1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/sdt.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/ip_ndp.h> 75 #include <inet/mi.h> 76 #include <inet/mib2.h> 77 #include <inet/nd.h> 78 #include <inet/optcom.h> 79 #include <inet/snmpcom.h> 80 #include <inet/kstatcom.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipclassifier.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipp_common.h> 85 86 /* 87 * The ipsec_info.h header file is here since it has the definition for the 88 * M_CTL message types used by IP to convey information to the ULP. The 89 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 90 */ 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 94 #include <sys/tsol/label.h> 95 #include <sys/tsol/tnet.h> 96 #include <rpc/pmap_prot.h> 97 98 /* 99 * Synchronization notes: 100 * 101 * UDP uses a combination of its internal perimeter, a global lock and 102 * a set of bind hash locks to protect its data structures. Please see 103 * the note above udp_mode_assertions for details about the internal 104 * perimeter. 105 * 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure. An UDP endpoint is removed from the bind hash list only 113 * when it is being unbound or being closed. The per bucket lock also 114 * protects a UDP endpoint's state changes. 115 * 116 * Plumbing notes: 117 * 118 * Both udp and ip are merged, but the streams plumbing is kept unchanged 119 * in that udp is always pushed atop /dev/ip. This is done to preserve 120 * backwards compatibility for certain applications which rely on such 121 * plumbing geometry to do things such as issuing I_POP on the stream 122 * in order to obtain direct access to /dev/ip, etc. 123 * 124 * All UDP processings happen in the /dev/ip instance; the udp module 125 * instance does not possess any state about the endpoint, and merely 126 * acts as a dummy module whose presence is to keep the streams plumbing 127 * appearance unchanged. At open time /dev/ip allocates a conn_t that 128 * happens to embed a udp_t. This stays dormant until the time udp is 129 * pushed, which indicates to /dev/ip that it must convert itself from 130 * an IP to a UDP endpoint. 131 * 132 * We only allow for the following plumbing cases: 133 * 134 * Normal: 135 * /dev/ip is first opened and later udp is pushed directly on top. 136 * This is the default action that happens when a udp socket or 137 * /dev/udp is opened. The conn_t created by /dev/ip instance is 138 * now shared and is marked with IPCL_UDP. 139 * 140 * SNMP-only: 141 * udp is pushed on top of a module other than /dev/ip. When this 142 * happens it will support only SNMP semantics. A new conn_t is 143 * allocated and marked with IPCL_UDPMOD. 144 * 145 * The above cases imply that we don't support any intermediate module to 146 * reside in between /dev/ip and udp -- in fact, we never supported such 147 * scenario in the past as the inter-layer communication semantics have 148 * always been private. Also note that the normal case allows for SNMP 149 * requests to be processed in addition to the rest of UDP operations. 150 * 151 * The normal case plumbing is depicted by the following diagram: 152 * 153 * +---------------+---------------+ 154 * | | | udp 155 * | udp_wq | udp_rq | 156 * | | UDP_RD | 157 * | | | 158 * +---------------+---------------+ 159 * | ^ 160 * v | 161 * +---------------+---------------+ 162 * | | | /dev/ip 163 * | ip_wq | ip_rq | conn_t 164 * | UDP_WR | | 165 * | | | 166 * +---------------+---------------+ 167 * 168 * Messages arriving at udp_wq from above will end up in ip_wq before 169 * it gets processed, i.e. udp write entry points will advance udp_wq 170 * and use its q_next value as ip_wq in order to use the conn_t that 171 * is stored in its q_ptr. Likewise, messages generated by ip to the 172 * module above udp will appear as if they are originated from udp_rq, 173 * i.e. putnext() calls to the module above udp is done using the 174 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 175 * nothing more than calling putnext(). 176 * 177 * The above implies the following rule of thumb: 178 * 179 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 180 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 181 * is no direct reference to conn_t from either udp_wq or udp_rq. 182 * 183 * 2. Write-side entry points of udp can obtain the conn_t via the 184 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 185 * 186 * 3. While in /dev/ip context, putnext() to the module above udp can 187 * be done by supplying the queue value obtained from UDP_RD(). 188 * 189 */ 190 191 static queue_t *UDP_WR(queue_t *); 192 static queue_t *UDP_RD(queue_t *); 193 194 struct kmem_cache *udp_cache; 195 196 /* For /etc/system control */ 197 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 198 199 #define NDD_TOO_QUICK_MSG \ 200 "ndd get info rate too high for non-privileged users, try again " \ 201 "later.\n" 202 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 203 204 /* Option processing attrs */ 205 typedef struct udpattrs_s { 206 union { 207 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 208 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 209 } udpattr_ippu; 210 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 211 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 212 mblk_t *udpattr_mb; 213 boolean_t udpattr_credset; 214 } udpattrs_t; 215 216 static void udp_addr_req(queue_t *q, mblk_t *mp); 217 static void udp_bind(queue_t *q, mblk_t *mp); 218 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 219 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 220 static int udp_build_hdrs(queue_t *q, udp_t *udp); 221 static void udp_capability_req(queue_t *q, mblk_t *mp); 222 static int udp_close(queue_t *q); 223 static void udp_connect(queue_t *q, mblk_t *mp); 224 static void udp_disconnect(queue_t *q, mblk_t *mp); 225 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 226 int sys_error); 227 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 228 t_scalar_t tlierr, int unixerr); 229 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 230 cred_t *cr); 231 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 232 char *value, caddr_t cp, cred_t *cr); 233 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 234 char *value, caddr_t cp, cred_t *cr); 235 static void udp_icmp_error(queue_t *q, mblk_t *mp); 236 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 237 static void udp_info_req(queue_t *q, mblk_t *mp); 238 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 239 t_scalar_t addr_length); 240 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 241 cred_t *credp); 242 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 243 int *errorp, udpattrs_t *udpattrs); 244 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 245 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 246 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 247 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 248 cred_t *cr); 249 static void udp_report_item(mblk_t *mp, udp_t *udp); 250 static void udp_rput(queue_t *q, mblk_t *mp); 251 static void udp_rput_other(queue_t *, mblk_t *); 252 static int udp_rinfop(queue_t *q, infod_t *dp); 253 static int udp_rrw(queue_t *q, struiod_t *dp); 254 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 255 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 256 cred_t *cr); 257 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 258 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 259 t_scalar_t destlen, t_scalar_t err); 260 static void udp_unbind(queue_t *q, mblk_t *mp); 261 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 262 boolean_t random); 263 static void udp_wput(queue_t *q, mblk_t *mp); 264 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 265 uint16_t port, uint_t srcid, int *error); 266 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 267 int *error); 268 static void udp_wput_other(queue_t *q, mblk_t *mp); 269 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 270 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 271 socklen_t addrlen); 272 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 273 274 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 275 static void udp_stack_fini(netstackid_t stackid, void *arg); 276 277 static void *udp_kstat_init(netstackid_t stackid); 278 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 279 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 280 static void udp_kstat2_fini(netstackid_t, kstat_t *); 281 static int udp_kstat_update(kstat_t *kp, int rw); 282 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 283 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 284 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 285 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 286 287 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 288 uint_t pkt_len); 289 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 290 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 291 static void udp_exit(conn_t *); 292 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 293 #ifdef DEBUG 294 static void udp_mode_assertions(udp_t *, int); 295 #endif /* DEBUG */ 296 297 major_t UDP6_MAJ; 298 #define UDP6 "udp6" 299 300 #define UDP_RECV_HIWATER (56 * 1024) 301 #define UDP_RECV_LOWATER 128 302 #define UDP_XMIT_HIWATER (56 * 1024) 303 #define UDP_XMIT_LOWATER 1024 304 305 static struct module_info udp_info = { 306 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 307 }; 308 309 static struct qinit udp_rinit = { 310 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 311 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 312 }; 313 314 static struct qinit udp_winit = { 315 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 316 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 317 }; 318 319 /* Support for just SNMP if UDP is not pushed directly over device IP */ 320 struct qinit udp_snmp_rinit = { 321 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 322 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 323 }; 324 325 struct qinit udp_snmp_winit = { 326 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 327 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 328 }; 329 330 struct streamtab udpinfo = { 331 &udp_rinit, &udp_winit 332 }; 333 334 static sin_t sin_null; /* Zero address for quick clears */ 335 static sin6_t sin6_null; /* Zero address for quick clears */ 336 337 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 338 339 /* Default structure copied into T_INFO_ACK messages */ 340 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 341 T_INFO_ACK, 342 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 343 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 344 T_INVALID, /* CDATA_size. udp does not support connect data. */ 345 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 346 sizeof (sin_t), /* ADDR_size. */ 347 0, /* OPT_size - not initialized here */ 348 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 349 T_CLTS, /* SERV_type. udp supports connection-less. */ 350 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 351 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 352 }; 353 354 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 355 356 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 357 T_INFO_ACK, 358 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 359 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 360 T_INVALID, /* CDATA_size. udp does not support connect data. */ 361 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 362 sizeof (sin6_t), /* ADDR_size. */ 363 0, /* OPT_size - not initialized here */ 364 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 365 T_CLTS, /* SERV_type. udp supports connection-less. */ 366 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 367 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 368 }; 369 370 /* largest UDP port number */ 371 #define UDP_MAX_PORT 65535 372 373 /* 374 * Table of ND variables supported by udp. These are loaded into us_nd 375 * in udp_open. 376 * All of these are alterable, within the min/max values given, at run time. 377 */ 378 /* BEGIN CSTYLED */ 379 udpparam_t udp_param_arr[] = { 380 /*min max value name */ 381 { 0L, 256, 32, "udp_wroff_extra" }, 382 { 1L, 255, 255, "udp_ipv4_ttl" }, 383 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 384 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 385 { 0, 1, 1, "udp_do_checksum" }, 386 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 387 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 388 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 389 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 390 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 391 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 392 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 393 }; 394 /* END CSTYLED */ 395 396 /* Setable in /etc/system */ 397 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 398 uint32_t udp_random_anon_port = 1; 399 400 /* 401 * Hook functions to enable cluster networking. 402 * On non-clustered systems these vectors must always be NULL 403 */ 404 405 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 406 uint8_t *laddrp, in_port_t lport) = NULL; 407 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 408 uint8_t *laddrp, in_port_t lport) = NULL; 409 410 typedef union T_primitives *t_primp_t; 411 412 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 413 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 414 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 415 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 416 (mp)->b_prev = (mblk_t *)proc; \ 417 if ((udp)->udp_mphead == NULL) \ 418 (udp)->udp_mphead = (mp); \ 419 else \ 420 (udp)->udp_mptail->b_next = (mp); \ 421 (udp)->udp_mptail = (mp); \ 422 (udp)->udp_mpcount++; \ 423 } 424 425 #define UDP_READERS_INCREF(udp) { \ 426 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 427 (udp)->udp_reader_count++; \ 428 } 429 430 #define UDP_READERS_DECREF(udp) { \ 431 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 432 (udp)->udp_reader_count--; \ 433 if ((udp)->udp_reader_count == 0) \ 434 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 435 } 436 437 #define UDP_SQUEUE_DECREF(udp) { \ 438 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 439 (udp)->udp_squeue_count--; \ 440 if ((udp)->udp_squeue_count == 0) \ 441 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 442 } 443 444 /* 445 * Notes on UDP endpoint synchronization: 446 * 447 * UDP needs exclusive operation on a per endpoint basis, when executing 448 * functions that modify the endpoint state. udp_rput_other() deals with 449 * packets with IP options, and processing these packets end up having 450 * to update the endpoint's option related state. udp_wput_other() deals 451 * with control operations from the top, e.g. connect() that needs to 452 * update the endpoint state. These could be synchronized using locks, 453 * but the current version uses squeues for this purpose. squeues may 454 * give performance improvement for certain cases such as connected UDP 455 * sockets; thus the framework allows for using squeues. 456 * 457 * The perimeter routines are described as follows: 458 * 459 * udp_enter(): 460 * Enter the UDP endpoint perimeter. 461 * 462 * udp_become_writer(): 463 * Become exclusive on the UDP endpoint. Specifies a function 464 * that will be called exclusively either immediately or later 465 * when the perimeter is available exclusively. 466 * 467 * udp_exit(): 468 * Exit the UDP perimeter. 469 * 470 * Entering UDP from the top or from the bottom must be done using 471 * udp_enter(). No lock must be held while attempting to enter the UDP 472 * perimeter. When finished, udp_exit() must be called to get out of 473 * the perimeter. 474 * 475 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 476 * multiple threads may enter a UDP endpoint concurrently. This is used 477 * for sending and/or receiving normal data. Control operations and other 478 * special cases call udp_become_writer() to become exclusive on a per 479 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 480 * by definition serializes access to the conn_t. When there are no more 481 * pending messages on the squeue for the UDP connection, the endpoint 482 * reverts to MT_HOT mode. During the interregnum when not all MT threads 483 * of an endpoint have finished, messages are queued in the UDP endpoint 484 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 485 * 486 * These modes have the following analogs: 487 * 488 * UDP_MT_HOT/udp_reader_count==0 none 489 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 490 * UDP_MT_QUEUED RW_WRITE_WANTED 491 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 492 * 493 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 494 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 495 * 496 * While in stable modes, UDP keeps track of the number of threads 497 * operating on the endpoint. The udp_reader_count variable represents 498 * the number of threads entering the endpoint as readers while it is 499 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 500 * is only a single reader, i.e. when this counter drops to 1. Likewise, 501 * udp_squeue_count represents the number of threads operating on the 502 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 503 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 504 * when this counter drops to 0. 505 * 506 * The default mode is set to UDP_MT_HOT and UDP alternates between 507 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 508 * 509 * Mode transition: 510 * ---------------------------------------------------------------- 511 * old mode Event New mode 512 * ---------------------------------------------------------------- 513 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 514 * and udp_reader_count == 1 515 * 516 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 517 * and udp_reader_count > 1 518 * 519 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 520 * 521 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 522 * internal UDP queue successfully 523 * moved to squeue AND udp_squeue_count != 0 524 * 525 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 526 * internal UDP queue successfully 527 * moved to squeue AND udp_squeue_count 528 * drops to zero 529 * 530 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 531 * ---------------------------------------------------------------- 532 */ 533 534 static queue_t * 535 UDP_WR(queue_t *q) 536 { 537 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 538 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 539 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 540 541 return (_WR(q)->q_next); 542 } 543 544 static queue_t * 545 UDP_RD(queue_t *q) 546 { 547 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 548 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 549 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 550 551 return (_RD(q)->q_next); 552 } 553 554 #ifdef DEBUG 555 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 556 #else 557 #define UDP_MODE_ASSERTIONS(udp, caller) 558 #endif 559 560 /* Invariants */ 561 #ifdef DEBUG 562 563 uint32_t udp_count[4]; 564 565 /* Context of udp_mode_assertions */ 566 #define UDP_ENTER 1 567 #define UDP_BECOME_WRITER 2 568 #define UDP_EXIT 3 569 570 static void 571 udp_mode_assertions(udp_t *udp, int caller) 572 { 573 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 574 575 switch (udp->udp_mode) { 576 case UDP_MT_HOT: 577 /* 578 * Messages have not yet been enqueued on the internal queue, 579 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 580 * by definition, there can't be any messages enqueued on the 581 * squeue. The UDP could be quiescent, so udp_reader_count 582 * could be zero at entry. 583 */ 584 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 585 udp->udp_squeue_count == 0); 586 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 587 udp_count[0]++; 588 break; 589 590 case UDP_MT_QUEUED: 591 /* 592 * The last MT thread to exit the udp perimeter empties the 593 * internal queue and then switches the UDP to 594 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 595 * mode, it means there must be at least 1 MT thread still in 596 * the perimeter and at least 1 message on the internal queue. 597 */ 598 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 599 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 600 udp_count[1]++; 601 break; 602 603 case UDP_QUEUED_SQUEUE: 604 /* 605 * The switch has happened from MT to SQUEUE. So there can't 606 * any MT threads. Messages could still pile up on the internal 607 * queue until the transition is complete and we move to 608 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 609 * since the squeue could drain any time. 610 */ 611 ASSERT(udp->udp_reader_count == 0); 612 udp_count[2]++; 613 break; 614 615 case UDP_SQUEUE: 616 /* 617 * The transition is complete. Thre can't be any messages on 618 * the internal queue. The udp could be quiescent or the squeue 619 * could drain any time, so we can't assert on nonzero 620 * udp_squeue_count during entry. Nor can we assert that 621 * udp_reader_count is zero, since, a reader thread could have 622 * directly become writer in line by calling udp_become_writer 623 * without going through the queued states. 624 */ 625 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 626 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 627 udp_count[3]++; 628 break; 629 } 630 } 631 #endif 632 633 #define _UDP_ENTER(connp, mp, proc, tag) { \ 634 udp_t *_udp = (connp)->conn_udp; \ 635 \ 636 mutex_enter(&(connp)->conn_lock); \ 637 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 638 mutex_exit(&(connp)->conn_lock); \ 639 freemsg(mp); \ 640 } else { \ 641 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 642 \ 643 switch (_udp->udp_mode) { \ 644 case UDP_MT_HOT: \ 645 /* We can execute as reader right away. */ \ 646 UDP_READERS_INCREF(_udp); \ 647 mutex_exit(&(connp)->conn_lock); \ 648 (*(proc))(connp, mp, (connp)->conn_sqp); \ 649 break; \ 650 \ 651 case UDP_SQUEUE: \ 652 /* \ 653 * We are in squeue mode, send the \ 654 * packet to the squeue \ 655 */ \ 656 _udp->udp_squeue_count++; \ 657 CONN_INC_REF_LOCKED(connp); \ 658 mutex_exit(&(connp)->conn_lock); \ 659 squeue_enter((connp)->conn_sqp, mp, proc, \ 660 connp, tag); \ 661 break; \ 662 \ 663 case UDP_MT_QUEUED: \ 664 case UDP_QUEUED_SQUEUE: \ 665 /* \ 666 * Some messages may have been enqueued \ 667 * ahead of us. Enqueue the new message \ 668 * at the tail of the internal queue to \ 669 * preserve message ordering. \ 670 */ \ 671 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 672 mutex_exit(&(connp)->conn_lock); \ 673 break; \ 674 } \ 675 } \ 676 } 677 678 static void 679 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 680 { 681 _UDP_ENTER(connp, mp, proc, tag); 682 } 683 684 static void 685 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 686 { 687 udp_t *udp; 688 689 udp = connp->conn_udp; 690 691 mutex_enter(&connp->conn_lock); 692 693 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 694 695 switch (udp->udp_mode) { 696 case UDP_MT_HOT: 697 if (udp->udp_reader_count == 1) { 698 /* 699 * We are the only MT thread. Switch to squeue mode 700 * immediately. 701 */ 702 udp->udp_mode = UDP_SQUEUE; 703 udp->udp_squeue_count = 1; 704 CONN_INC_REF_LOCKED(connp); 705 mutex_exit(&connp->conn_lock); 706 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 707 return; 708 } 709 /* FALLTHRU */ 710 711 case UDP_MT_QUEUED: 712 /* Enqueue the packet internally in UDP */ 713 udp->udp_mode = UDP_MT_QUEUED; 714 UDP_ENQUEUE_MP(udp, mp, proc, tag); 715 mutex_exit(&connp->conn_lock); 716 return; 717 718 case UDP_SQUEUE: 719 case UDP_QUEUED_SQUEUE: 720 /* 721 * We are already exclusive. i.e. we are already 722 * writer. Simply call the desired function. 723 */ 724 udp->udp_squeue_count++; 725 mutex_exit(&connp->conn_lock); 726 (*proc)(connp, mp, connp->conn_sqp); 727 return; 728 } 729 } 730 731 /* 732 * Transition from MT mode to SQUEUE mode, when the last MT thread 733 * is exiting the UDP perimeter. Move all messages from the internal 734 * udp queue to the squeue. A better way would be to move all the 735 * messages in one shot, this needs more support from the squeue framework 736 */ 737 static void 738 udp_switch_to_squeue(udp_t *udp) 739 { 740 mblk_t *mp; 741 mblk_t *mp_next; 742 sqproc_t proc; 743 uint8_t tag; 744 conn_t *connp = udp->udp_connp; 745 746 ASSERT(MUTEX_HELD(&connp->conn_lock)); 747 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 748 while (udp->udp_mphead != NULL) { 749 mp = udp->udp_mphead; 750 udp->udp_mphead = NULL; 751 udp->udp_mptail = NULL; 752 udp->udp_mpcount = 0; 753 udp->udp_mode = UDP_QUEUED_SQUEUE; 754 mutex_exit(&connp->conn_lock); 755 /* 756 * It is best not to hold any locks across the calls 757 * to squeue functions. Since we drop the lock we 758 * need to go back and check the udp_mphead once again 759 * after the squeue_fill and hence the while loop at 760 * the top of this function 761 */ 762 for (; mp != NULL; mp = mp_next) { 763 mp_next = mp->b_next; 764 proc = (sqproc_t)mp->b_prev; 765 tag = (uint8_t)((uintptr_t)mp->b_queue); 766 mp->b_next = NULL; 767 mp->b_prev = NULL; 768 mp->b_queue = NULL; 769 CONN_INC_REF(connp); 770 udp->udp_squeue_count++; 771 squeue_fill(connp->conn_sqp, mp, proc, connp, 772 tag); 773 } 774 mutex_enter(&connp->conn_lock); 775 } 776 /* 777 * udp_squeue_count of zero implies that the squeue has drained 778 * even before we arrived here (i.e. after the squeue_fill above) 779 */ 780 udp->udp_mode = (udp->udp_squeue_count != 0) ? 781 UDP_SQUEUE : UDP_MT_HOT; 782 } 783 784 #define _UDP_EXIT(connp) { \ 785 udp_t *_udp = (connp)->conn_udp; \ 786 \ 787 mutex_enter(&(connp)->conn_lock); \ 788 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 789 \ 790 switch (_udp->udp_mode) { \ 791 case UDP_MT_HOT: \ 792 UDP_READERS_DECREF(_udp); \ 793 mutex_exit(&(connp)->conn_lock); \ 794 break; \ 795 \ 796 case UDP_SQUEUE: \ 797 UDP_SQUEUE_DECREF(_udp); \ 798 if (_udp->udp_squeue_count == 0) \ 799 _udp->udp_mode = UDP_MT_HOT; \ 800 mutex_exit(&(connp)->conn_lock); \ 801 break; \ 802 \ 803 case UDP_MT_QUEUED: \ 804 /* \ 805 * If this is the last MT thread, we need to \ 806 * switch to squeue mode \ 807 */ \ 808 UDP_READERS_DECREF(_udp); \ 809 if (_udp->udp_reader_count == 0) \ 810 udp_switch_to_squeue(_udp); \ 811 mutex_exit(&(connp)->conn_lock); \ 812 break; \ 813 \ 814 case UDP_QUEUED_SQUEUE: \ 815 UDP_SQUEUE_DECREF(_udp); \ 816 /* \ 817 * Even if the udp_squeue_count drops to zero, we \ 818 * don't want to change udp_mode to UDP_MT_HOT here. \ 819 * The thread in udp_switch_to_squeue will take care \ 820 * of the transition to UDP_MT_HOT, after emptying \ 821 * any more new messages that have been enqueued in \ 822 * udp_mphead. \ 823 */ \ 824 mutex_exit(&(connp)->conn_lock); \ 825 break; \ 826 } \ 827 } 828 829 static void 830 udp_exit(conn_t *connp) 831 { 832 _UDP_EXIT(connp); 833 } 834 835 /* 836 * Return the next anonymous port in the privileged port range for 837 * bind checking. 838 * 839 * Trusted Extension (TX) notes: TX allows administrator to mark or 840 * reserve ports as Multilevel ports (MLP). MLP has special function 841 * on TX systems. Once a port is made MLP, it's not available as 842 * ordinary port. This creates "holes" in the port name space. It 843 * may be necessary to skip the "holes" find a suitable anon port. 844 */ 845 static in_port_t 846 udp_get_next_priv_port(udp_t *udp) 847 { 848 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 849 in_port_t nextport; 850 boolean_t restart = B_FALSE; 851 udp_stack_t *us = udp->udp_us; 852 853 retry: 854 if (next_priv_port < us->us_min_anonpriv_port || 855 next_priv_port >= IPPORT_RESERVED) { 856 next_priv_port = IPPORT_RESERVED - 1; 857 if (restart) 858 return (0); 859 restart = B_TRUE; 860 } 861 862 if (is_system_labeled() && 863 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 864 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 865 next_priv_port = nextport; 866 goto retry; 867 } 868 869 return (next_priv_port--); 870 } 871 872 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 873 /* ARGSUSED */ 874 static int 875 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 876 { 877 udp_fanout_t *udpf; 878 int i; 879 zoneid_t zoneid; 880 conn_t *connp; 881 udp_t *udp; 882 udp_stack_t *us; 883 884 connp = Q_TO_CONN(q); 885 udp = connp->conn_udp; 886 us = udp->udp_us; 887 888 /* Refer to comments in udp_status_report(). */ 889 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 890 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 891 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 892 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 893 return (0); 894 } 895 } 896 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 897 /* The following may work even if we cannot get a large buf. */ 898 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 899 return (0); 900 } 901 902 (void) mi_mpprintf(mp, 903 "UDP " MI_COL_HDRPAD_STR 904 /* 12345678[89ABCDEF] */ 905 " zone lport src addr dest addr port state"); 906 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 907 908 zoneid = connp->conn_zoneid; 909 910 for (i = 0; i < us->us_bind_fanout_size; i++) { 911 udpf = &us->us_bind_fanout[i]; 912 mutex_enter(&udpf->uf_lock); 913 914 /* Print the hash index. */ 915 udp = udpf->uf_udp; 916 if (zoneid != GLOBAL_ZONEID) { 917 /* skip to first entry in this zone; might be none */ 918 while (udp != NULL && 919 udp->udp_connp->conn_zoneid != zoneid) 920 udp = udp->udp_bind_hash; 921 } 922 if (udp != NULL) { 923 uint_t print_len, buf_len; 924 925 buf_len = mp->b_cont->b_datap->db_lim - 926 mp->b_cont->b_wptr; 927 print_len = snprintf((char *)mp->b_cont->b_wptr, 928 buf_len, "%d\n", i); 929 if (print_len < buf_len) { 930 mp->b_cont->b_wptr += print_len; 931 } else { 932 mp->b_cont->b_wptr += buf_len; 933 } 934 for (; udp != NULL; udp = udp->udp_bind_hash) { 935 if (zoneid == GLOBAL_ZONEID || 936 zoneid == udp->udp_connp->conn_zoneid) 937 udp_report_item(mp->b_cont, udp); 938 } 939 } 940 mutex_exit(&udpf->uf_lock); 941 } 942 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 943 return (0); 944 } 945 946 /* 947 * Hash list removal routine for udp_t structures. 948 */ 949 static void 950 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 951 { 952 udp_t *udpnext; 953 kmutex_t *lockp; 954 udp_stack_t *us = udp->udp_us; 955 956 if (udp->udp_ptpbhn == NULL) 957 return; 958 959 /* 960 * Extract the lock pointer in case there are concurrent 961 * hash_remove's for this instance. 962 */ 963 ASSERT(udp->udp_port != 0); 964 if (!caller_holds_lock) { 965 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 966 us->us_bind_fanout_size)].uf_lock; 967 ASSERT(lockp != NULL); 968 mutex_enter(lockp); 969 } 970 if (udp->udp_ptpbhn != NULL) { 971 udpnext = udp->udp_bind_hash; 972 if (udpnext != NULL) { 973 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 974 udp->udp_bind_hash = NULL; 975 } 976 *udp->udp_ptpbhn = udpnext; 977 udp->udp_ptpbhn = NULL; 978 } 979 if (!caller_holds_lock) { 980 mutex_exit(lockp); 981 } 982 } 983 984 static void 985 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 986 { 987 udp_t **udpp; 988 udp_t *udpnext; 989 990 ASSERT(MUTEX_HELD(&uf->uf_lock)); 991 if (udp->udp_ptpbhn != NULL) { 992 udp_bind_hash_remove(udp, B_TRUE); 993 } 994 udpp = &uf->uf_udp; 995 udpnext = udpp[0]; 996 if (udpnext != NULL) { 997 /* 998 * If the new udp bound to the INADDR_ANY address 999 * and the first one in the list is not bound to 1000 * INADDR_ANY we skip all entries until we find the 1001 * first one bound to INADDR_ANY. 1002 * This makes sure that applications binding to a 1003 * specific address get preference over those binding to 1004 * INADDR_ANY. 1005 */ 1006 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1007 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1008 while ((udpnext = udpp[0]) != NULL && 1009 !V6_OR_V4_INADDR_ANY( 1010 udpnext->udp_bound_v6src)) { 1011 udpp = &(udpnext->udp_bind_hash); 1012 } 1013 if (udpnext != NULL) 1014 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1015 } else { 1016 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1017 } 1018 } 1019 udp->udp_bind_hash = udpnext; 1020 udp->udp_ptpbhn = udpp; 1021 udpp[0] = udp; 1022 } 1023 1024 /* 1025 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1026 * passed to udp_wput. 1027 * It associates a port number and local address with the stream. 1028 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1029 * protocol type (IPPROTO_UDP) placed in the message following the address. 1030 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1031 * (Called as writer.) 1032 * 1033 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1034 * without setting SO_REUSEADDR. This is needed so that they 1035 * can be viewed as two independent transport protocols. 1036 * However, anonymouns ports are allocated from the same range to avoid 1037 * duplicating the us->us_next_port_to_try. 1038 */ 1039 static void 1040 udp_bind(queue_t *q, mblk_t *mp) 1041 { 1042 sin_t *sin; 1043 sin6_t *sin6; 1044 mblk_t *mp1; 1045 in_port_t port; /* Host byte order */ 1046 in_port_t requested_port; /* Host byte order */ 1047 struct T_bind_req *tbr; 1048 int count; 1049 in6_addr_t v6src; 1050 boolean_t bind_to_req_port_only; 1051 int loopmax; 1052 udp_fanout_t *udpf; 1053 in_port_t lport; /* Network byte order */ 1054 zoneid_t zoneid; 1055 conn_t *connp; 1056 udp_t *udp; 1057 boolean_t is_inaddr_any; 1058 mlp_type_t addrtype, mlptype; 1059 udp_stack_t *us; 1060 1061 connp = Q_TO_CONN(q); 1062 udp = connp->conn_udp; 1063 us = udp->udp_us; 1064 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1065 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1066 "udp_bind: bad req, len %u", 1067 (uint_t)(mp->b_wptr - mp->b_rptr)); 1068 udp_err_ack(q, mp, TPROTO, 0); 1069 return; 1070 } 1071 1072 if (udp->udp_state != TS_UNBND) { 1073 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1074 "udp_bind: bad state, %u", udp->udp_state); 1075 udp_err_ack(q, mp, TOUTSTATE, 0); 1076 return; 1077 } 1078 /* 1079 * Reallocate the message to make sure we have enough room for an 1080 * address and the protocol type. 1081 */ 1082 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1083 if (!mp1) { 1084 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1085 return; 1086 } 1087 1088 mp = mp1; 1089 tbr = (struct T_bind_req *)mp->b_rptr; 1090 switch (tbr->ADDR_length) { 1091 case 0: /* Request for a generic port */ 1092 tbr->ADDR_offset = sizeof (struct T_bind_req); 1093 if (udp->udp_family == AF_INET) { 1094 tbr->ADDR_length = sizeof (sin_t); 1095 sin = (sin_t *)&tbr[1]; 1096 *sin = sin_null; 1097 sin->sin_family = AF_INET; 1098 mp->b_wptr = (uchar_t *)&sin[1]; 1099 } else { 1100 ASSERT(udp->udp_family == AF_INET6); 1101 tbr->ADDR_length = sizeof (sin6_t); 1102 sin6 = (sin6_t *)&tbr[1]; 1103 *sin6 = sin6_null; 1104 sin6->sin6_family = AF_INET6; 1105 mp->b_wptr = (uchar_t *)&sin6[1]; 1106 } 1107 port = 0; 1108 break; 1109 1110 case sizeof (sin_t): /* Complete IPv4 address */ 1111 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1112 sizeof (sin_t)); 1113 if (sin == NULL || !OK_32PTR((char *)sin)) { 1114 udp_err_ack(q, mp, TSYSERR, EINVAL); 1115 return; 1116 } 1117 if (udp->udp_family != AF_INET || 1118 sin->sin_family != AF_INET) { 1119 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1120 return; 1121 } 1122 port = ntohs(sin->sin_port); 1123 break; 1124 1125 case sizeof (sin6_t): /* complete IPv6 address */ 1126 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1127 sizeof (sin6_t)); 1128 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1129 udp_err_ack(q, mp, TSYSERR, EINVAL); 1130 return; 1131 } 1132 if (udp->udp_family != AF_INET6 || 1133 sin6->sin6_family != AF_INET6) { 1134 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1135 return; 1136 } 1137 port = ntohs(sin6->sin6_port); 1138 break; 1139 1140 default: /* Invalid request */ 1141 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1142 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1143 udp_err_ack(q, mp, TBADADDR, 0); 1144 return; 1145 } 1146 1147 requested_port = port; 1148 1149 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1150 bind_to_req_port_only = B_FALSE; 1151 else /* T_BIND_REQ and requested_port != 0 */ 1152 bind_to_req_port_only = B_TRUE; 1153 1154 if (requested_port == 0) { 1155 /* 1156 * If the application passed in zero for the port number, it 1157 * doesn't care which port number we bind to. Get one in the 1158 * valid range. 1159 */ 1160 if (udp->udp_anon_priv_bind) { 1161 port = udp_get_next_priv_port(udp); 1162 } else { 1163 port = udp_update_next_port(udp, 1164 us->us_next_port_to_try, B_TRUE); 1165 } 1166 } else { 1167 /* 1168 * If the port is in the well-known privileged range, 1169 * make sure the caller was privileged. 1170 */ 1171 int i; 1172 boolean_t priv = B_FALSE; 1173 1174 if (port < us->us_smallest_nonpriv_port) { 1175 priv = B_TRUE; 1176 } else { 1177 for (i = 0; i < us->us_num_epriv_ports; i++) { 1178 if (port == us->us_epriv_ports[i]) { 1179 priv = B_TRUE; 1180 break; 1181 } 1182 } 1183 } 1184 1185 if (priv) { 1186 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1187 1188 if (secpolicy_net_privaddr(cr, port) != 0) { 1189 udp_err_ack(q, mp, TACCES, 0); 1190 return; 1191 } 1192 } 1193 } 1194 1195 if (port == 0) { 1196 udp_err_ack(q, mp, TNOADDR, 0); 1197 return; 1198 } 1199 1200 /* 1201 * Copy the source address into our udp structure. This address 1202 * may still be zero; if so, IP will fill in the correct address 1203 * each time an outbound packet is passed to it. 1204 */ 1205 if (udp->udp_family == AF_INET) { 1206 ASSERT(sin != NULL); 1207 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1208 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1209 udp->udp_ip_snd_options_len; 1210 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1211 } else { 1212 ASSERT(sin6 != NULL); 1213 v6src = sin6->sin6_addr; 1214 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1215 udp->udp_ipversion = IPV4_VERSION; 1216 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1217 UDPH_SIZE + udp->udp_ip_snd_options_len; 1218 } else { 1219 udp->udp_ipversion = IPV6_VERSION; 1220 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1221 } 1222 } 1223 1224 /* 1225 * If udp_reuseaddr is not set, then we have to make sure that 1226 * the IP address and port number the application requested 1227 * (or we selected for the application) is not being used by 1228 * another stream. If another stream is already using the 1229 * requested IP address and port, the behavior depends on 1230 * "bind_to_req_port_only". If set the bind fails; otherwise we 1231 * search for any an unused port to bind to the the stream. 1232 * 1233 * As per the BSD semantics, as modified by the Deering multicast 1234 * changes, if udp_reuseaddr is set, then we allow multiple binds 1235 * to the same port independent of the local IP address. 1236 * 1237 * This is slightly different than in SunOS 4.X which did not 1238 * support IP multicast. Note that the change implemented by the 1239 * Deering multicast code effects all binds - not only binding 1240 * to IP multicast addresses. 1241 * 1242 * Note that when binding to port zero we ignore SO_REUSEADDR in 1243 * order to guarantee a unique port. 1244 */ 1245 1246 count = 0; 1247 if (udp->udp_anon_priv_bind) { 1248 /* 1249 * loopmax = (IPPORT_RESERVED-1) - 1250 * us->us_min_anonpriv_port + 1 1251 */ 1252 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 1253 } else { 1254 loopmax = us->us_largest_anon_port - 1255 us->us_smallest_anon_port + 1; 1256 } 1257 1258 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1259 zoneid = connp->conn_zoneid; 1260 1261 for (;;) { 1262 udp_t *udp1; 1263 boolean_t found_exclbind = B_FALSE; 1264 1265 /* 1266 * Walk through the list of udp streams bound to 1267 * requested port with the same IP address. 1268 */ 1269 lport = htons(port); 1270 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 1271 us->us_bind_fanout_size)]; 1272 mutex_enter(&udpf->uf_lock); 1273 for (udp1 = udpf->uf_udp; udp1 != NULL; 1274 udp1 = udp1->udp_bind_hash) { 1275 if (lport != udp1->udp_port) 1276 continue; 1277 1278 /* 1279 * On a labeled system, we must treat bindings to ports 1280 * on shared IP addresses by sockets with MAC exemption 1281 * privilege as being in all zones, as there's 1282 * otherwise no way to identify the right receiver. 1283 */ 1284 if (zoneid != udp1->udp_connp->conn_zoneid && 1285 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 1286 continue; 1287 1288 /* 1289 * If UDP_EXCLBIND is set for either the bound or 1290 * binding endpoint, the semantics of bind 1291 * is changed according to the following chart. 1292 * 1293 * spec = specified address (v4 or v6) 1294 * unspec = unspecified address (v4 or v6) 1295 * A = specified addresses are different for endpoints 1296 * 1297 * bound bind to allowed? 1298 * ------------------------------------- 1299 * unspec unspec no 1300 * unspec spec no 1301 * spec unspec no 1302 * spec spec yes if A 1303 * 1304 * For labeled systems, SO_MAC_EXEMPT behaves the same 1305 * as UDP_EXCLBIND, except that zoneid is ignored. 1306 */ 1307 if (udp1->udp_exclbind || udp->udp_exclbind || 1308 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 1309 if (V6_OR_V4_INADDR_ANY( 1310 udp1->udp_bound_v6src) || 1311 is_inaddr_any || 1312 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1313 &v6src)) { 1314 found_exclbind = B_TRUE; 1315 break; 1316 } 1317 continue; 1318 } 1319 1320 /* 1321 * Check ipversion to allow IPv4 and IPv6 sockets to 1322 * have disjoint port number spaces. 1323 */ 1324 if (udp->udp_ipversion != udp1->udp_ipversion) { 1325 1326 /* 1327 * On the first time through the loop, if the 1328 * the user intentionally specified a 1329 * particular port number, then ignore any 1330 * bindings of the other protocol that may 1331 * conflict. This allows the user to bind IPv6 1332 * alone and get both v4 and v6, or bind both 1333 * both and get each seperately. On subsequent 1334 * times through the loop, we're checking a 1335 * port that we chose (not the user) and thus 1336 * we do not allow casual duplicate bindings. 1337 */ 1338 if (count == 0 && requested_port != 0) 1339 continue; 1340 } 1341 1342 /* 1343 * No difference depending on SO_REUSEADDR. 1344 * 1345 * If existing port is bound to a 1346 * non-wildcard IP address and 1347 * the requesting stream is bound to 1348 * a distinct different IP addresses 1349 * (non-wildcard, also), keep going. 1350 */ 1351 if (!is_inaddr_any && 1352 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1353 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1354 &v6src)) { 1355 continue; 1356 } 1357 break; 1358 } 1359 1360 if (!found_exclbind && 1361 (udp->udp_reuseaddr && requested_port != 0)) { 1362 break; 1363 } 1364 1365 if (udp1 == NULL) { 1366 /* 1367 * No other stream has this IP address 1368 * and port number. We can use it. 1369 */ 1370 break; 1371 } 1372 mutex_exit(&udpf->uf_lock); 1373 if (bind_to_req_port_only) { 1374 /* 1375 * We get here only when requested port 1376 * is bound (and only first of the for() 1377 * loop iteration). 1378 * 1379 * The semantics of this bind request 1380 * require it to fail so we return from 1381 * the routine (and exit the loop). 1382 * 1383 */ 1384 udp_err_ack(q, mp, TADDRBUSY, 0); 1385 return; 1386 } 1387 1388 if (udp->udp_anon_priv_bind) { 1389 port = udp_get_next_priv_port(udp); 1390 } else { 1391 if ((count == 0) && (requested_port != 0)) { 1392 /* 1393 * If the application wants us to find 1394 * a port, get one to start with. Set 1395 * requested_port to 0, so that we will 1396 * update us->us_next_port_to_try below. 1397 */ 1398 port = udp_update_next_port(udp, 1399 us->us_next_port_to_try, B_TRUE); 1400 requested_port = 0; 1401 } else { 1402 port = udp_update_next_port(udp, port + 1, 1403 B_FALSE); 1404 } 1405 } 1406 1407 if (port == 0 || ++count >= loopmax) { 1408 /* 1409 * We've tried every possible port number and 1410 * there are none available, so send an error 1411 * to the user. 1412 */ 1413 udp_err_ack(q, mp, TNOADDR, 0); 1414 return; 1415 } 1416 } 1417 1418 /* 1419 * Copy the source address into our udp structure. This address 1420 * may still be zero; if so, ip will fill in the correct address 1421 * each time an outbound packet is passed to it. 1422 * If we are binding to a broadcast or multicast address udp_rput 1423 * will clear the source address when it receives the T_BIND_ACK. 1424 */ 1425 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1426 udp->udp_port = lport; 1427 /* 1428 * Now reset the the next anonymous port if the application requested 1429 * an anonymous port, or we handed out the next anonymous port. 1430 */ 1431 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1432 us->us_next_port_to_try = port + 1; 1433 } 1434 1435 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1436 if (udp->udp_family == AF_INET) { 1437 sin->sin_port = udp->udp_port; 1438 } else { 1439 int error; 1440 1441 sin6->sin6_port = udp->udp_port; 1442 /* Rebuild the header template */ 1443 error = udp_build_hdrs(q, udp); 1444 if (error != 0) { 1445 mutex_exit(&udpf->uf_lock); 1446 udp_err_ack(q, mp, TSYSERR, error); 1447 return; 1448 } 1449 } 1450 udp->udp_state = TS_IDLE; 1451 udp_bind_hash_insert(udpf, udp); 1452 mutex_exit(&udpf->uf_lock); 1453 1454 if (cl_inet_bind) { 1455 /* 1456 * Running in cluster mode - register bind information 1457 */ 1458 if (udp->udp_ipversion == IPV4_VERSION) { 1459 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1460 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1461 (in_port_t)udp->udp_port); 1462 } else { 1463 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1464 (uint8_t *)&(udp->udp_v6src), 1465 (in_port_t)udp->udp_port); 1466 } 1467 1468 } 1469 1470 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1471 if (is_system_labeled() && (!connp->conn_anon_port || 1472 connp->conn_anon_mlp)) { 1473 uint16_t mlpport; 1474 cred_t *cr = connp->conn_cred; 1475 zone_t *zone; 1476 1477 zone = crgetzone(cr); 1478 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1479 mlptSingle; 1480 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 1481 &v6src, udp->udp_us->us_netstack->netstack_ip); 1482 if (addrtype == mlptSingle) { 1483 udp_err_ack(q, mp, TNOADDR, 0); 1484 connp->conn_anon_port = B_FALSE; 1485 connp->conn_mlp_type = mlptSingle; 1486 return; 1487 } 1488 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1489 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1490 addrtype); 1491 if (mlptype != mlptSingle && 1492 (connp->conn_mlp_type == mlptSingle || 1493 secpolicy_net_bindmlp(cr) != 0)) { 1494 if (udp->udp_debug) { 1495 (void) strlog(UDP_MOD_ID, 0, 1, 1496 SL_ERROR|SL_TRACE, 1497 "udp_bind: no priv for multilevel port %d", 1498 mlpport); 1499 } 1500 udp_err_ack(q, mp, TACCES, 0); 1501 connp->conn_anon_port = B_FALSE; 1502 connp->conn_mlp_type = mlptSingle; 1503 return; 1504 } 1505 1506 /* 1507 * If we're specifically binding a shared IP address and the 1508 * port is MLP on shared addresses, then check to see if this 1509 * zone actually owns the MLP. Reject if not. 1510 */ 1511 if (mlptype == mlptShared && addrtype == mlptShared) { 1512 /* 1513 * No need to handle exclusive-stack zones since 1514 * ALL_ZONES only applies to the shared stack. 1515 */ 1516 zoneid_t mlpzone; 1517 1518 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1519 htons(mlpport)); 1520 if (connp->conn_zoneid != mlpzone) { 1521 if (udp->udp_debug) { 1522 (void) strlog(UDP_MOD_ID, 0, 1, 1523 SL_ERROR|SL_TRACE, 1524 "udp_bind: attempt to bind port " 1525 "%d on shared addr in zone %d " 1526 "(should be %d)", 1527 mlpport, connp->conn_zoneid, 1528 mlpzone); 1529 } 1530 udp_err_ack(q, mp, TACCES, 0); 1531 connp->conn_anon_port = B_FALSE; 1532 connp->conn_mlp_type = mlptSingle; 1533 return; 1534 } 1535 } 1536 if (connp->conn_anon_port) { 1537 int error; 1538 1539 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1540 port, B_TRUE); 1541 if (error != 0) { 1542 if (udp->udp_debug) { 1543 (void) strlog(UDP_MOD_ID, 0, 1, 1544 SL_ERROR|SL_TRACE, 1545 "udp_bind: cannot establish anon " 1546 "MLP for port %d", port); 1547 } 1548 udp_err_ack(q, mp, TACCES, 0); 1549 connp->conn_anon_port = B_FALSE; 1550 connp->conn_mlp_type = mlptSingle; 1551 return; 1552 } 1553 } 1554 connp->conn_mlp_type = mlptype; 1555 } 1556 1557 /* Pass the protocol number in the message following the address. */ 1558 *mp->b_wptr++ = IPPROTO_UDP; 1559 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1560 /* 1561 * Append a request for an IRE if udp_v6src not 1562 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1563 */ 1564 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1565 if (!mp->b_cont) { 1566 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1567 return; 1568 } 1569 mp->b_cont->b_wptr += sizeof (ire_t); 1570 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1571 } 1572 if (udp->udp_family == AF_INET6) 1573 mp = ip_bind_v6(q, mp, connp, NULL); 1574 else 1575 mp = ip_bind_v4(q, mp, connp); 1576 1577 if (mp != NULL) 1578 udp_rput_other(_RD(q), mp); 1579 else 1580 CONN_INC_REF(connp); 1581 } 1582 1583 1584 void 1585 udp_resume_bind(conn_t *connp, mblk_t *mp) 1586 { 1587 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1588 } 1589 1590 /* 1591 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1592 */ 1593 /* ARGSUSED */ 1594 static void 1595 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1596 { 1597 conn_t *connp = arg; 1598 1599 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1600 1601 udp_rput_other(connp->conn_rq, mp); 1602 1603 CONN_OPER_PENDING_DONE(connp); 1604 udp_exit(connp); 1605 } 1606 1607 /* 1608 * This routine handles each T_CONN_REQ message passed to udp. It 1609 * associates a default destination address with the stream. 1610 * 1611 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1612 * T_BIND_REQ - specifying local and remote address/port 1613 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1614 * T_OK_ACK - for the T_CONN_REQ 1615 * T_CONN_CON - to keep the TPI user happy 1616 * 1617 * The connect completes in udp_rput. 1618 * When a T_BIND_ACK is received information is extracted from the IRE 1619 * and the two appended messages are sent to the TPI user. 1620 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1621 * it to an error ack for the appropriate primitive. 1622 */ 1623 static void 1624 udp_connect(queue_t *q, mblk_t *mp) 1625 { 1626 sin6_t *sin6; 1627 sin_t *sin; 1628 struct T_conn_req *tcr; 1629 in6_addr_t v6dst; 1630 ipaddr_t v4dst; 1631 uint16_t dstport; 1632 uint32_t flowinfo; 1633 mblk_t *mp1, *mp2; 1634 udp_fanout_t *udpf; 1635 udp_t *udp, *udp1; 1636 udp_stack_t *us; 1637 1638 udp = Q_TO_UDP(q); 1639 1640 tcr = (struct T_conn_req *)mp->b_rptr; 1641 us = udp->udp_us; 1642 1643 /* A bit of sanity checking */ 1644 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1645 udp_err_ack(q, mp, TPROTO, 0); 1646 return; 1647 } 1648 /* 1649 * This UDP must have bound to a port already before doing 1650 * a connect. 1651 */ 1652 if (udp->udp_state == TS_UNBND) { 1653 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1654 "udp_connect: bad state, %u", udp->udp_state); 1655 udp_err_ack(q, mp, TOUTSTATE, 0); 1656 return; 1657 } 1658 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1659 1660 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1661 us->us_bind_fanout_size)]; 1662 1663 if (udp->udp_state == TS_DATA_XFER) { 1664 /* Already connected - clear out state */ 1665 mutex_enter(&udpf->uf_lock); 1666 udp->udp_v6src = udp->udp_bound_v6src; 1667 udp->udp_state = TS_IDLE; 1668 mutex_exit(&udpf->uf_lock); 1669 } 1670 1671 if (tcr->OPT_length != 0) { 1672 udp_err_ack(q, mp, TBADOPT, 0); 1673 return; 1674 } 1675 1676 /* 1677 * Determine packet type based on type of address passed in 1678 * the request should contain an IPv4 or IPv6 address. 1679 * Make sure that address family matches the type of 1680 * family of the the address passed down 1681 */ 1682 switch (tcr->DEST_length) { 1683 default: 1684 udp_err_ack(q, mp, TBADADDR, 0); 1685 return; 1686 1687 case sizeof (sin_t): 1688 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1689 sizeof (sin_t)); 1690 if (sin == NULL || !OK_32PTR((char *)sin)) { 1691 udp_err_ack(q, mp, TSYSERR, EINVAL); 1692 return; 1693 } 1694 if (udp->udp_family != AF_INET || 1695 sin->sin_family != AF_INET) { 1696 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1697 return; 1698 } 1699 v4dst = sin->sin_addr.s_addr; 1700 dstport = sin->sin_port; 1701 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1702 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1703 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1704 udp->udp_ip_snd_options_len; 1705 break; 1706 1707 case sizeof (sin6_t): 1708 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1709 sizeof (sin6_t)); 1710 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1711 udp_err_ack(q, mp, TSYSERR, EINVAL); 1712 return; 1713 } 1714 if (udp->udp_family != AF_INET6 || 1715 sin6->sin6_family != AF_INET6) { 1716 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1717 return; 1718 } 1719 v6dst = sin6->sin6_addr; 1720 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1721 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1722 udp->udp_ipversion = IPV4_VERSION; 1723 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1724 UDPH_SIZE + udp->udp_ip_snd_options_len; 1725 flowinfo = 0; 1726 } else { 1727 udp->udp_ipversion = IPV6_VERSION; 1728 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1729 flowinfo = sin6->sin6_flowinfo; 1730 } 1731 dstport = sin6->sin6_port; 1732 break; 1733 } 1734 if (dstport == 0) { 1735 udp_err_ack(q, mp, TBADADDR, 0); 1736 return; 1737 } 1738 1739 /* 1740 * Create a default IP header with no IP options. 1741 */ 1742 udp->udp_dstport = dstport; 1743 if (udp->udp_ipversion == IPV4_VERSION) { 1744 /* 1745 * Interpret a zero destination to mean loopback. 1746 * Update the T_CONN_REQ (sin/sin6) since it is used to 1747 * generate the T_CONN_CON. 1748 */ 1749 if (v4dst == INADDR_ANY) { 1750 v4dst = htonl(INADDR_LOOPBACK); 1751 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1752 if (udp->udp_family == AF_INET) { 1753 sin->sin_addr.s_addr = v4dst; 1754 } else { 1755 sin6->sin6_addr = v6dst; 1756 } 1757 } 1758 udp->udp_v6dst = v6dst; 1759 udp->udp_flowinfo = 0; 1760 1761 /* 1762 * If the destination address is multicast and 1763 * an outgoing multicast interface has been set, 1764 * use the address of that interface as our 1765 * source address if no source address has been set. 1766 */ 1767 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1768 CLASSD(v4dst) && 1769 udp->udp_multicast_if_addr != INADDR_ANY) { 1770 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1771 &udp->udp_v6src); 1772 } 1773 } else { 1774 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1775 /* 1776 * Interpret a zero destination to mean loopback. 1777 * Update the T_CONN_REQ (sin/sin6) since it is used to 1778 * generate the T_CONN_CON. 1779 */ 1780 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1781 v6dst = ipv6_loopback; 1782 sin6->sin6_addr = v6dst; 1783 } 1784 udp->udp_v6dst = v6dst; 1785 udp->udp_flowinfo = flowinfo; 1786 /* 1787 * If the destination address is multicast and 1788 * an outgoing multicast interface has been set, 1789 * then the ip bind logic will pick the correct source 1790 * address (i.e. matching the outgoing multicast interface). 1791 */ 1792 } 1793 1794 /* 1795 * Verify that the src/port/dst/port is unique for all 1796 * connections in TS_DATA_XFER 1797 */ 1798 mutex_enter(&udpf->uf_lock); 1799 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1800 if (udp1->udp_state != TS_DATA_XFER) 1801 continue; 1802 if (udp->udp_port != udp1->udp_port || 1803 udp->udp_ipversion != udp1->udp_ipversion || 1804 dstport != udp1->udp_dstport || 1805 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1806 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1807 continue; 1808 mutex_exit(&udpf->uf_lock); 1809 udp_err_ack(q, mp, TBADADDR, 0); 1810 return; 1811 } 1812 udp->udp_state = TS_DATA_XFER; 1813 mutex_exit(&udpf->uf_lock); 1814 1815 /* 1816 * Send down bind to IP to verify that there is a route 1817 * and to determine the source address. 1818 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1819 */ 1820 if (udp->udp_family == AF_INET) 1821 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1822 else 1823 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1824 if (mp1 == NULL) { 1825 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1826 bind_failed: 1827 mutex_enter(&udpf->uf_lock); 1828 udp->udp_state = TS_IDLE; 1829 mutex_exit(&udpf->uf_lock); 1830 return; 1831 } 1832 1833 /* 1834 * We also have to send a connection confirmation to 1835 * keep TLI happy. Prepare it for udp_rput. 1836 */ 1837 if (udp->udp_family == AF_INET) 1838 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1839 sizeof (*sin), NULL, 0); 1840 else 1841 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1842 sizeof (*sin6), NULL, 0); 1843 if (mp2 == NULL) { 1844 freemsg(mp1); 1845 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1846 goto bind_failed; 1847 } 1848 1849 mp = mi_tpi_ok_ack_alloc(mp); 1850 if (mp == NULL) { 1851 /* Unable to reuse the T_CONN_REQ for the ack. */ 1852 freemsg(mp2); 1853 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1854 goto bind_failed; 1855 } 1856 1857 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1858 linkb(mp1, mp); 1859 linkb(mp1, mp2); 1860 1861 mblk_setcred(mp1, udp->udp_connp->conn_cred); 1862 if (udp->udp_family == AF_INET) 1863 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1864 else 1865 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1866 1867 if (mp1 != NULL) 1868 udp_rput_other(_RD(q), mp1); 1869 else 1870 CONN_INC_REF(udp->udp_connp); 1871 } 1872 1873 static int 1874 udp_close(queue_t *q) 1875 { 1876 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1877 udp_t *udp; 1878 queue_t *ip_rq = RD(UDP_WR(q)); 1879 1880 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1881 udp = connp->conn_udp; 1882 1883 ip_quiesce_conn(connp); 1884 /* 1885 * Disable read-side synchronous stream 1886 * interface and drain any queued data. 1887 */ 1888 udp_rcv_drain(q, udp, B_TRUE); 1889 ASSERT(!udp->udp_direct_sockfs); 1890 1891 qprocsoff(q); 1892 1893 /* restore IP module's high and low water marks to default values */ 1894 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1895 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1896 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1897 1898 ASSERT(udp->udp_rcv_cnt == 0); 1899 ASSERT(udp->udp_rcv_msgcnt == 0); 1900 ASSERT(udp->udp_rcv_list_head == NULL); 1901 ASSERT(udp->udp_rcv_list_tail == NULL); 1902 1903 udp_close_free(connp); 1904 1905 /* 1906 * Restore connp as an IP endpoint. 1907 * Locking required to prevent a race with udp_snmp_get()/ 1908 * ipcl_get_next_conn(), which selects conn_t which are 1909 * IPCL_UDP and not CONN_CONDEMNED. 1910 */ 1911 mutex_enter(&connp->conn_lock); 1912 connp->conn_flags &= ~IPCL_UDP; 1913 connp->conn_state_flags &= 1914 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1915 connp->conn_ulp_labeled = B_FALSE; 1916 mutex_exit(&connp->conn_lock); 1917 1918 return (0); 1919 } 1920 1921 /* 1922 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1923 */ 1924 void 1925 udp_quiesce_conn(conn_t *connp) 1926 { 1927 udp_t *udp = connp->conn_udp; 1928 1929 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1930 /* 1931 * Running in cluster mode - register unbind information 1932 */ 1933 if (udp->udp_ipversion == IPV4_VERSION) { 1934 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1935 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1936 (in_port_t)udp->udp_port); 1937 } else { 1938 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1939 (uint8_t *)(&(udp->udp_v6src)), 1940 (in_port_t)udp->udp_port); 1941 } 1942 } 1943 1944 udp_bind_hash_remove(udp, B_FALSE); 1945 1946 mutex_enter(&connp->conn_lock); 1947 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1948 udp->udp_mode != UDP_MT_HOT) { 1949 cv_wait(&connp->conn_cv, &connp->conn_lock); 1950 } 1951 mutex_exit(&connp->conn_lock); 1952 } 1953 1954 void 1955 udp_close_free(conn_t *connp) 1956 { 1957 udp_t *udp = connp->conn_udp; 1958 1959 /* If there are any options associated with the stream, free them. */ 1960 if (udp->udp_ip_snd_options) { 1961 mi_free((char *)udp->udp_ip_snd_options); 1962 udp->udp_ip_snd_options = NULL; 1963 } 1964 1965 if (udp->udp_ip_rcv_options) { 1966 mi_free((char *)udp->udp_ip_rcv_options); 1967 udp->udp_ip_rcv_options = NULL; 1968 } 1969 1970 /* Free memory associated with sticky options */ 1971 if (udp->udp_sticky_hdrs_len != 0) { 1972 kmem_free(udp->udp_sticky_hdrs, 1973 udp->udp_sticky_hdrs_len); 1974 udp->udp_sticky_hdrs = NULL; 1975 udp->udp_sticky_hdrs_len = 0; 1976 } 1977 1978 ip6_pkt_free(&udp->udp_sticky_ipp); 1979 1980 udp->udp_connp = NULL; 1981 netstack_rele(udp->udp_us->us_netstack); 1982 1983 connp->conn_udp = NULL; 1984 kmem_cache_free(udp_cache, udp); 1985 } 1986 1987 /* 1988 * This routine handles each T_DISCON_REQ message passed to udp 1989 * as an indicating that UDP is no longer connected. This results 1990 * in sending a T_BIND_REQ to IP to restore the binding to just 1991 * the local address/port. 1992 * 1993 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1994 * T_BIND_REQ - specifying just the local address/port 1995 * T_OK_ACK - for the T_DISCON_REQ 1996 * 1997 * The disconnect completes in udp_rput. 1998 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1999 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 2000 * it to an error ack for the appropriate primitive. 2001 */ 2002 static void 2003 udp_disconnect(queue_t *q, mblk_t *mp) 2004 { 2005 udp_t *udp = Q_TO_UDP(q); 2006 mblk_t *mp1; 2007 udp_fanout_t *udpf; 2008 udp_stack_t *us; 2009 2010 us = udp->udp_us; 2011 if (udp->udp_state != TS_DATA_XFER) { 2012 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2013 "udp_disconnect: bad state, %u", udp->udp_state); 2014 udp_err_ack(q, mp, TOUTSTATE, 0); 2015 return; 2016 } 2017 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 2018 us->us_bind_fanout_size)]; 2019 mutex_enter(&udpf->uf_lock); 2020 udp->udp_v6src = udp->udp_bound_v6src; 2021 udp->udp_state = TS_IDLE; 2022 mutex_exit(&udpf->uf_lock); 2023 2024 /* 2025 * Send down bind to IP to remove the full binding and revert 2026 * to the local address binding. 2027 */ 2028 if (udp->udp_family == AF_INET) 2029 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 2030 else 2031 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 2032 if (mp1 == NULL) { 2033 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2034 return; 2035 } 2036 mp = mi_tpi_ok_ack_alloc(mp); 2037 if (mp == NULL) { 2038 /* Unable to reuse the T_DISCON_REQ for the ack. */ 2039 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 2040 return; 2041 } 2042 2043 if (udp->udp_family == AF_INET6) { 2044 int error; 2045 2046 /* Rebuild the header template */ 2047 error = udp_build_hdrs(q, udp); 2048 if (error != 0) { 2049 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 2050 freemsg(mp1); 2051 return; 2052 } 2053 } 2054 mutex_enter(&udpf->uf_lock); 2055 udp->udp_discon_pending = 1; 2056 mutex_exit(&udpf->uf_lock); 2057 2058 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 2059 linkb(mp1, mp); 2060 2061 if (udp->udp_family == AF_INET6) 2062 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 2063 else 2064 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 2065 2066 if (mp1 != NULL) 2067 udp_rput_other(_RD(q), mp1); 2068 else 2069 CONN_INC_REF(udp->udp_connp); 2070 } 2071 2072 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 2073 static void 2074 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 2075 { 2076 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2077 putnext(UDP_RD(q), mp); 2078 } 2079 2080 /* Shorthand to generate and send TPI error acks to our client */ 2081 static void 2082 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2083 int sys_error) 2084 { 2085 struct T_error_ack *teackp; 2086 2087 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2088 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2089 teackp = (struct T_error_ack *)mp->b_rptr; 2090 teackp->ERROR_prim = primitive; 2091 teackp->TLI_error = t_error; 2092 teackp->UNIX_error = sys_error; 2093 putnext(UDP_RD(q), mp); 2094 } 2095 } 2096 2097 /*ARGSUSED*/ 2098 static int 2099 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2100 { 2101 int i; 2102 udp_t *udp = Q_TO_UDP(q); 2103 udp_stack_t *us = udp->udp_us; 2104 2105 for (i = 0; i < us->us_num_epriv_ports; i++) { 2106 if (us->us_epriv_ports[i] != 0) 2107 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 2108 } 2109 return (0); 2110 } 2111 2112 /* ARGSUSED */ 2113 static int 2114 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2115 cred_t *cr) 2116 { 2117 long new_value; 2118 int i; 2119 udp_t *udp = Q_TO_UDP(q); 2120 udp_stack_t *us = udp->udp_us; 2121 2122 /* 2123 * Fail the request if the new value does not lie within the 2124 * port number limits. 2125 */ 2126 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2127 new_value <= 0 || new_value >= 65536) { 2128 return (EINVAL); 2129 } 2130 2131 /* Check if the value is already in the list */ 2132 for (i = 0; i < us->us_num_epriv_ports; i++) { 2133 if (new_value == us->us_epriv_ports[i]) { 2134 return (EEXIST); 2135 } 2136 } 2137 /* Find an empty slot */ 2138 for (i = 0; i < us->us_num_epriv_ports; i++) { 2139 if (us->us_epriv_ports[i] == 0) 2140 break; 2141 } 2142 if (i == us->us_num_epriv_ports) { 2143 return (EOVERFLOW); 2144 } 2145 2146 /* Set the new value */ 2147 us->us_epriv_ports[i] = (in_port_t)new_value; 2148 return (0); 2149 } 2150 2151 /* ARGSUSED */ 2152 static int 2153 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2154 cred_t *cr) 2155 { 2156 long new_value; 2157 int i; 2158 udp_t *udp = Q_TO_UDP(q); 2159 udp_stack_t *us = udp->udp_us; 2160 2161 /* 2162 * Fail the request if the new value does not lie within the 2163 * port number limits. 2164 */ 2165 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2166 new_value <= 0 || new_value >= 65536) { 2167 return (EINVAL); 2168 } 2169 2170 /* Check that the value is already in the list */ 2171 for (i = 0; i < us->us_num_epriv_ports; i++) { 2172 if (us->us_epriv_ports[i] == new_value) 2173 break; 2174 } 2175 if (i == us->us_num_epriv_ports) { 2176 return (ESRCH); 2177 } 2178 2179 /* Clear the value */ 2180 us->us_epriv_ports[i] = 0; 2181 return (0); 2182 } 2183 2184 /* At minimum we need 4 bytes of UDP header */ 2185 #define ICMP_MIN_UDP_HDR 4 2186 2187 /* 2188 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2189 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2190 * Assumes that IP has pulled up everything up to and including the ICMP header. 2191 * An M_CTL could potentially come here from some other module (i.e. if UDP 2192 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2193 * does not have enough ICMP information , following STREAMS conventions, 2194 * we send it upstream assuming it is an M_CTL we don't understand. 2195 */ 2196 static void 2197 udp_icmp_error(queue_t *q, mblk_t *mp) 2198 { 2199 icmph_t *icmph; 2200 ipha_t *ipha; 2201 int iph_hdr_length; 2202 udpha_t *udpha; 2203 sin_t sin; 2204 sin6_t sin6; 2205 mblk_t *mp1; 2206 int error = 0; 2207 size_t mp_size = MBLKL(mp); 2208 udp_t *udp = Q_TO_UDP(q); 2209 2210 /* 2211 * Assume IP provides aligned packets - otherwise toss 2212 */ 2213 if (!OK_32PTR(mp->b_rptr)) { 2214 freemsg(mp); 2215 return; 2216 } 2217 2218 /* 2219 * Verify that we have a complete IP header and the application has 2220 * asked for errors. If not, send it upstream. 2221 */ 2222 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2223 noticmpv4: 2224 putnext(UDP_RD(q), mp); 2225 return; 2226 } 2227 2228 ipha = (ipha_t *)mp->b_rptr; 2229 /* 2230 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2231 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2232 */ 2233 switch (IPH_HDR_VERSION(ipha)) { 2234 case IPV6_VERSION: 2235 udp_icmp_error_ipv6(q, mp); 2236 return; 2237 case IPV4_VERSION: 2238 break; 2239 default: 2240 goto noticmpv4; 2241 } 2242 2243 /* Skip past the outer IP and ICMP headers */ 2244 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2245 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2246 /* 2247 * If we don't have the correct outer IP header length or if the ULP 2248 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2249 * send the packet upstream. 2250 */ 2251 if (iph_hdr_length < sizeof (ipha_t) || 2252 ipha->ipha_protocol != IPPROTO_ICMP || 2253 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2254 goto noticmpv4; 2255 } 2256 ipha = (ipha_t *)&icmph[1]; 2257 2258 /* Skip past the inner IP and find the ULP header */ 2259 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2260 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2261 /* 2262 * If we don't have the correct inner IP header length or if the ULP 2263 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2264 * bytes of UDP header, send it upstream. 2265 */ 2266 if (iph_hdr_length < sizeof (ipha_t) || 2267 ipha->ipha_protocol != IPPROTO_UDP || 2268 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2269 goto noticmpv4; 2270 } 2271 2272 switch (icmph->icmph_type) { 2273 case ICMP_DEST_UNREACHABLE: 2274 switch (icmph->icmph_code) { 2275 case ICMP_FRAGMENTATION_NEEDED: 2276 /* 2277 * IP has already adjusted the path MTU. 2278 * XXX Somehow pass MTU indication to application? 2279 */ 2280 break; 2281 case ICMP_PORT_UNREACHABLE: 2282 case ICMP_PROTOCOL_UNREACHABLE: 2283 error = ECONNREFUSED; 2284 break; 2285 default: 2286 /* Transient errors */ 2287 break; 2288 } 2289 break; 2290 default: 2291 /* Transient errors */ 2292 break; 2293 } 2294 if (error == 0) { 2295 freemsg(mp); 2296 return; 2297 } 2298 2299 switch (udp->udp_family) { 2300 case AF_INET: 2301 sin = sin_null; 2302 sin.sin_family = AF_INET; 2303 sin.sin_addr.s_addr = ipha->ipha_dst; 2304 sin.sin_port = udpha->uha_dst_port; 2305 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2306 error); 2307 break; 2308 case AF_INET6: 2309 sin6 = sin6_null; 2310 sin6.sin6_family = AF_INET6; 2311 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2312 sin6.sin6_port = udpha->uha_dst_port; 2313 2314 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2315 NULL, 0, error); 2316 break; 2317 } 2318 if (mp1) 2319 putnext(UDP_RD(q), mp1); 2320 freemsg(mp); 2321 } 2322 2323 /* 2324 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2325 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2326 * Assumes that IP has pulled up all the extension headers as well as the 2327 * ICMPv6 header. 2328 * An M_CTL could potentially come here from some other module (i.e. if UDP 2329 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2330 * does not have enough ICMP information , following STREAMS conventions, 2331 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2332 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2333 * field (when cast to ipha_t in udp_icmp_error). 2334 */ 2335 static void 2336 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2337 { 2338 icmp6_t *icmp6; 2339 ip6_t *ip6h, *outer_ip6h; 2340 uint16_t hdr_length; 2341 uint8_t *nexthdrp; 2342 udpha_t *udpha; 2343 sin6_t sin6; 2344 mblk_t *mp1; 2345 int error = 0; 2346 size_t mp_size = MBLKL(mp); 2347 udp_t *udp = Q_TO_UDP(q); 2348 2349 /* 2350 * Verify that we have a complete IP header. If not, send it upstream. 2351 */ 2352 if (mp_size < sizeof (ip6_t)) { 2353 noticmpv6: 2354 putnext(UDP_RD(q), mp); 2355 return; 2356 } 2357 2358 outer_ip6h = (ip6_t *)mp->b_rptr; 2359 /* 2360 * Verify this is an ICMPV6 packet, else send it upstream 2361 */ 2362 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2363 hdr_length = IPV6_HDR_LEN; 2364 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2365 &nexthdrp) || 2366 *nexthdrp != IPPROTO_ICMPV6) { 2367 goto noticmpv6; 2368 } 2369 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2370 ip6h = (ip6_t *)&icmp6[1]; 2371 /* 2372 * Verify we have a complete ICMP and inner IP header. 2373 */ 2374 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2375 goto noticmpv6; 2376 2377 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2378 goto noticmpv6; 2379 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2380 /* 2381 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2382 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2383 * packet upstream. 2384 */ 2385 if ((*nexthdrp != IPPROTO_UDP) || 2386 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2387 goto noticmpv6; 2388 } 2389 2390 switch (icmp6->icmp6_type) { 2391 case ICMP6_DST_UNREACH: 2392 switch (icmp6->icmp6_code) { 2393 case ICMP6_DST_UNREACH_NOPORT: 2394 error = ECONNREFUSED; 2395 break; 2396 case ICMP6_DST_UNREACH_ADMIN: 2397 case ICMP6_DST_UNREACH_NOROUTE: 2398 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2399 case ICMP6_DST_UNREACH_ADDR: 2400 /* Transient errors */ 2401 break; 2402 default: 2403 break; 2404 } 2405 break; 2406 case ICMP6_PACKET_TOO_BIG: { 2407 struct T_unitdata_ind *tudi; 2408 struct T_opthdr *toh; 2409 size_t udi_size; 2410 mblk_t *newmp; 2411 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2412 sizeof (struct ip6_mtuinfo); 2413 sin6_t *sin6; 2414 struct ip6_mtuinfo *mtuinfo; 2415 2416 /* 2417 * If the application has requested to receive path mtu 2418 * information, send up an empty message containing an 2419 * IPV6_PATHMTU ancillary data item. 2420 */ 2421 if (!udp->udp_ipv6_recvpathmtu) 2422 break; 2423 2424 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2425 opt_length; 2426 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2427 BUMP_MIB(&udp->udp_mib, udpInErrors); 2428 break; 2429 } 2430 2431 /* 2432 * newmp->b_cont is left to NULL on purpose. This is an 2433 * empty message containing only ancillary data. 2434 */ 2435 newmp->b_datap->db_type = M_PROTO; 2436 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2437 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2438 tudi->PRIM_type = T_UNITDATA_IND; 2439 tudi->SRC_length = sizeof (sin6_t); 2440 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2441 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2442 tudi->OPT_length = opt_length; 2443 2444 sin6 = (sin6_t *)&tudi[1]; 2445 bzero(sin6, sizeof (sin6_t)); 2446 sin6->sin6_family = AF_INET6; 2447 sin6->sin6_addr = udp->udp_v6dst; 2448 2449 toh = (struct T_opthdr *)&sin6[1]; 2450 toh->level = IPPROTO_IPV6; 2451 toh->name = IPV6_PATHMTU; 2452 toh->len = opt_length; 2453 toh->status = 0; 2454 2455 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2456 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2457 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2458 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2459 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2460 /* 2461 * We've consumed everything we need from the original 2462 * message. Free it, then send our empty message. 2463 */ 2464 freemsg(mp); 2465 putnext(UDP_RD(q), newmp); 2466 return; 2467 } 2468 case ICMP6_TIME_EXCEEDED: 2469 /* Transient errors */ 2470 break; 2471 case ICMP6_PARAM_PROB: 2472 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2473 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2474 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2475 (uchar_t *)nexthdrp) { 2476 error = ECONNREFUSED; 2477 break; 2478 } 2479 break; 2480 } 2481 if (error == 0) { 2482 freemsg(mp); 2483 return; 2484 } 2485 2486 sin6 = sin6_null; 2487 sin6.sin6_family = AF_INET6; 2488 sin6.sin6_addr = ip6h->ip6_dst; 2489 sin6.sin6_port = udpha->uha_dst_port; 2490 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2491 2492 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2493 error); 2494 if (mp1) 2495 putnext(UDP_RD(q), mp1); 2496 freemsg(mp); 2497 } 2498 2499 /* 2500 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2501 * The local address is filled in if endpoint is bound. The remote address 2502 * is filled in if remote address has been precified ("connected endpoint") 2503 * (The concept of connected CLTS sockets is alien to published TPI 2504 * but we support it anyway). 2505 */ 2506 static void 2507 udp_addr_req(queue_t *q, mblk_t *mp) 2508 { 2509 sin_t *sin; 2510 sin6_t *sin6; 2511 mblk_t *ackmp; 2512 struct T_addr_ack *taa; 2513 udp_t *udp = Q_TO_UDP(q); 2514 2515 /* Make it large enough for worst case */ 2516 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2517 2 * sizeof (sin6_t), 1); 2518 if (ackmp == NULL) { 2519 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2520 return; 2521 } 2522 taa = (struct T_addr_ack *)ackmp->b_rptr; 2523 2524 bzero(taa, sizeof (struct T_addr_ack)); 2525 ackmp->b_wptr = (uchar_t *)&taa[1]; 2526 2527 taa->PRIM_type = T_ADDR_ACK; 2528 ackmp->b_datap->db_type = M_PCPROTO; 2529 /* 2530 * Note: Following code assumes 32 bit alignment of basic 2531 * data structures like sin_t and struct T_addr_ack. 2532 */ 2533 if (udp->udp_state != TS_UNBND) { 2534 /* 2535 * Fill in local address first 2536 */ 2537 taa->LOCADDR_offset = sizeof (*taa); 2538 if (udp->udp_family == AF_INET) { 2539 taa->LOCADDR_length = sizeof (sin_t); 2540 sin = (sin_t *)&taa[1]; 2541 /* Fill zeroes and then initialize non-zero fields */ 2542 *sin = sin_null; 2543 sin->sin_family = AF_INET; 2544 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2545 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2546 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2547 sin->sin_addr.s_addr); 2548 } else { 2549 /* 2550 * INADDR_ANY 2551 * udp_v6src is not set, we might be bound to 2552 * broadcast/multicast. Use udp_bound_v6src as 2553 * local address instead (that could 2554 * also still be INADDR_ANY) 2555 */ 2556 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2557 sin->sin_addr.s_addr); 2558 } 2559 sin->sin_port = udp->udp_port; 2560 ackmp->b_wptr = (uchar_t *)&sin[1]; 2561 if (udp->udp_state == TS_DATA_XFER) { 2562 /* 2563 * connected, fill remote address too 2564 */ 2565 taa->REMADDR_length = sizeof (sin_t); 2566 /* assumed 32-bit alignment */ 2567 taa->REMADDR_offset = taa->LOCADDR_offset + 2568 taa->LOCADDR_length; 2569 2570 sin = (sin_t *)(ackmp->b_rptr + 2571 taa->REMADDR_offset); 2572 /* initialize */ 2573 *sin = sin_null; 2574 sin->sin_family = AF_INET; 2575 sin->sin_addr.s_addr = 2576 V4_PART_OF_V6(udp->udp_v6dst); 2577 sin->sin_port = udp->udp_dstport; 2578 ackmp->b_wptr = (uchar_t *)&sin[1]; 2579 } 2580 } else { 2581 taa->LOCADDR_length = sizeof (sin6_t); 2582 sin6 = (sin6_t *)&taa[1]; 2583 /* Fill zeroes and then initialize non-zero fields */ 2584 *sin6 = sin6_null; 2585 sin6->sin6_family = AF_INET6; 2586 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2587 sin6->sin6_addr = udp->udp_v6src; 2588 } else { 2589 /* 2590 * UNSPECIFIED 2591 * udp_v6src is not set, we might be bound to 2592 * broadcast/multicast. Use udp_bound_v6src as 2593 * local address instead (that could 2594 * also still be UNSPECIFIED) 2595 */ 2596 sin6->sin6_addr = 2597 udp->udp_bound_v6src; 2598 } 2599 sin6->sin6_port = udp->udp_port; 2600 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2601 if (udp->udp_state == TS_DATA_XFER) { 2602 /* 2603 * connected, fill remote address too 2604 */ 2605 taa->REMADDR_length = sizeof (sin6_t); 2606 /* assumed 32-bit alignment */ 2607 taa->REMADDR_offset = taa->LOCADDR_offset + 2608 taa->LOCADDR_length; 2609 2610 sin6 = (sin6_t *)(ackmp->b_rptr + 2611 taa->REMADDR_offset); 2612 /* initialize */ 2613 *sin6 = sin6_null; 2614 sin6->sin6_family = AF_INET6; 2615 sin6->sin6_addr = udp->udp_v6dst; 2616 sin6->sin6_port = udp->udp_dstport; 2617 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2618 } 2619 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2620 } 2621 } 2622 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2623 putnext(UDP_RD(q), ackmp); 2624 } 2625 2626 static void 2627 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2628 { 2629 if (udp->udp_family == AF_INET) { 2630 *tap = udp_g_t_info_ack_ipv4; 2631 } else { 2632 *tap = udp_g_t_info_ack_ipv6; 2633 } 2634 tap->CURRENT_state = udp->udp_state; 2635 tap->OPT_size = udp_max_optsize; 2636 } 2637 2638 /* 2639 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2640 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2641 * udp_g_t_info_ack. The current state of the stream is copied from 2642 * udp_state. 2643 */ 2644 static void 2645 udp_capability_req(queue_t *q, mblk_t *mp) 2646 { 2647 t_uscalar_t cap_bits1; 2648 struct T_capability_ack *tcap; 2649 udp_t *udp = Q_TO_UDP(q); 2650 2651 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2652 2653 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2654 mp->b_datap->db_type, T_CAPABILITY_ACK); 2655 if (!mp) 2656 return; 2657 2658 tcap = (struct T_capability_ack *)mp->b_rptr; 2659 tcap->CAP_bits1 = 0; 2660 2661 if (cap_bits1 & TC1_INFO) { 2662 udp_copy_info(&tcap->INFO_ack, udp); 2663 tcap->CAP_bits1 |= TC1_INFO; 2664 } 2665 2666 putnext(UDP_RD(q), mp); 2667 } 2668 2669 /* 2670 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2671 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2672 * The current state of the stream is copied from udp_state. 2673 */ 2674 static void 2675 udp_info_req(queue_t *q, mblk_t *mp) 2676 { 2677 udp_t *udp = Q_TO_UDP(q); 2678 2679 /* Create a T_INFO_ACK message. */ 2680 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2681 T_INFO_ACK); 2682 if (!mp) 2683 return; 2684 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2685 putnext(UDP_RD(q), mp); 2686 } 2687 2688 /* 2689 * IP recognizes seven kinds of bind requests: 2690 * 2691 * - A zero-length address binds only to the protocol number. 2692 * 2693 * - A 4-byte address is treated as a request to 2694 * validate that the address is a valid local IPv4 2695 * address, appropriate for an application to bind to. 2696 * IP does the verification, but does not make any note 2697 * of the address at this time. 2698 * 2699 * - A 16-byte address contains is treated as a request 2700 * to validate a local IPv6 address, as the 4-byte 2701 * address case above. 2702 * 2703 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2704 * use it for the inbound fanout of packets. 2705 * 2706 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2707 * use it for the inbound fanout of packets. 2708 * 2709 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2710 * information consisting of local and remote addresses 2711 * and ports. In this case, the addresses are both 2712 * validated as appropriate for this operation, and, if 2713 * so, the information is retained for use in the 2714 * inbound fanout. 2715 * 2716 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2717 * fanout information, like the 12-byte case above. 2718 * 2719 * IP will also fill in the IRE request mblk with information 2720 * regarding our peer. In all cases, we notify IP of our protocol 2721 * type by appending a single protocol byte to the bind request. 2722 */ 2723 static mblk_t * 2724 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2725 { 2726 char *cp; 2727 mblk_t *mp; 2728 struct T_bind_req *tbr; 2729 ipa_conn_t *ac; 2730 ipa6_conn_t *ac6; 2731 sin_t *sin; 2732 sin6_t *sin6; 2733 2734 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2735 2736 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2737 if (!mp) 2738 return (mp); 2739 mp->b_datap->db_type = M_PROTO; 2740 tbr = (struct T_bind_req *)mp->b_rptr; 2741 tbr->PRIM_type = bind_prim; 2742 tbr->ADDR_offset = sizeof (*tbr); 2743 tbr->CONIND_number = 0; 2744 tbr->ADDR_length = addr_length; 2745 cp = (char *)&tbr[1]; 2746 switch (addr_length) { 2747 case sizeof (ipa_conn_t): 2748 ASSERT(udp->udp_family == AF_INET); 2749 /* Append a request for an IRE */ 2750 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2751 if (!mp->b_cont) { 2752 freemsg(mp); 2753 return (NULL); 2754 } 2755 mp->b_cont->b_wptr += sizeof (ire_t); 2756 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2757 2758 /* cp known to be 32 bit aligned */ 2759 ac = (ipa_conn_t *)cp; 2760 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2761 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2762 ac->ac_fport = udp->udp_dstport; 2763 ac->ac_lport = udp->udp_port; 2764 break; 2765 2766 case sizeof (ipa6_conn_t): 2767 ASSERT(udp->udp_family == AF_INET6); 2768 /* Append a request for an IRE */ 2769 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2770 if (!mp->b_cont) { 2771 freemsg(mp); 2772 return (NULL); 2773 } 2774 mp->b_cont->b_wptr += sizeof (ire_t); 2775 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2776 2777 /* cp known to be 32 bit aligned */ 2778 ac6 = (ipa6_conn_t *)cp; 2779 ac6->ac6_laddr = udp->udp_v6src; 2780 ac6->ac6_faddr = udp->udp_v6dst; 2781 ac6->ac6_fport = udp->udp_dstport; 2782 ac6->ac6_lport = udp->udp_port; 2783 break; 2784 2785 case sizeof (sin_t): 2786 ASSERT(udp->udp_family == AF_INET); 2787 /* Append a request for an IRE */ 2788 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2789 if (!mp->b_cont) { 2790 freemsg(mp); 2791 return (NULL); 2792 } 2793 mp->b_cont->b_wptr += sizeof (ire_t); 2794 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2795 2796 sin = (sin_t *)cp; 2797 *sin = sin_null; 2798 sin->sin_family = AF_INET; 2799 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2800 sin->sin_port = udp->udp_port; 2801 break; 2802 2803 case sizeof (sin6_t): 2804 ASSERT(udp->udp_family == AF_INET6); 2805 /* Append a request for an IRE */ 2806 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2807 if (!mp->b_cont) { 2808 freemsg(mp); 2809 return (NULL); 2810 } 2811 mp->b_cont->b_wptr += sizeof (ire_t); 2812 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2813 2814 sin6 = (sin6_t *)cp; 2815 *sin6 = sin6_null; 2816 sin6->sin6_family = AF_INET6; 2817 sin6->sin6_addr = udp->udp_bound_v6src; 2818 sin6->sin6_port = udp->udp_port; 2819 break; 2820 } 2821 /* Add protocol number to end */ 2822 cp[addr_length] = (char)IPPROTO_UDP; 2823 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2824 return (mp); 2825 } 2826 2827 /* 2828 * This is the open routine for udp. It allocates a udp_t structure for 2829 * the stream and, on the first open of the module, creates an ND table. 2830 */ 2831 /* ARGSUSED */ 2832 static int 2833 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2834 { 2835 int err; 2836 udp_t *udp; 2837 conn_t *connp; 2838 queue_t *ip_wq; 2839 zoneid_t zoneid; 2840 netstack_t *ns; 2841 udp_stack_t *us; 2842 2843 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2844 2845 /* If the stream is already open, return immediately. */ 2846 if (q->q_ptr != NULL) 2847 return (0); 2848 2849 /* If this is not a push of udp as a module, fail. */ 2850 if (sflag != MODOPEN) 2851 return (EINVAL); 2852 2853 ns = netstack_find_by_cred(credp); 2854 ASSERT(ns != NULL); 2855 us = ns->netstack_udp; 2856 ASSERT(us != NULL); 2857 2858 /* 2859 * For exclusive stacks we set the zoneid to zero 2860 * to make UDP operate as if in the global zone. 2861 */ 2862 if (us->us_netstack->netstack_stackid != GLOBAL_NETSTACKID) 2863 zoneid = GLOBAL_ZONEID; 2864 else 2865 zoneid = crgetzoneid(credp); 2866 2867 q->q_hiwat = us->us_recv_hiwat; 2868 WR(q)->q_hiwat = us->us_xmit_hiwat; 2869 WR(q)->q_lowat = us->us_xmit_lowat; 2870 2871 /* Insert ourselves in the stream since we're about to walk q_next */ 2872 qprocson(q); 2873 2874 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2875 bzero(udp, sizeof (*udp)); 2876 2877 /* 2878 * UDP is supported only as a module and it has to be pushed directly 2879 * above the device instance of IP. If UDP is pushed anywhere else 2880 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2881 * sake of MIB browsers and fail everything else. 2882 */ 2883 ip_wq = WR(q)->q_next; 2884 if (NOT_OVER_IP(ip_wq)) { 2885 /* Support just SNMP for MIB browsers */ 2886 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP, 2887 us->us_netstack); 2888 connp->conn_rq = q; 2889 connp->conn_wq = WR(q); 2890 connp->conn_flags |= IPCL_UDPMOD; 2891 connp->conn_cred = credp; 2892 connp->conn_zoneid = zoneid; 2893 connp->conn_udp = udp; 2894 udp->udp_us = us; 2895 udp->udp_connp = connp; 2896 q->q_ptr = WR(q)->q_ptr = connp; 2897 crhold(credp); 2898 q->q_qinfo = &udp_snmp_rinit; 2899 WR(q)->q_qinfo = &udp_snmp_winit; 2900 return (0); 2901 } 2902 2903 /* 2904 * Initialize the udp_t structure for this stream. 2905 */ 2906 q = RD(ip_wq); 2907 connp = Q_TO_CONN(q); 2908 mutex_enter(&connp->conn_lock); 2909 connp->conn_proto = IPPROTO_UDP; 2910 connp->conn_flags |= IPCL_UDP; 2911 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2912 connp->conn_udp = udp; 2913 2914 /* Set the initial state of the stream and the privilege status. */ 2915 udp->udp_connp = connp; 2916 udp->udp_state = TS_UNBND; 2917 udp->udp_mode = UDP_MT_HOT; 2918 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2919 udp->udp_family = AF_INET6; 2920 udp->udp_ipversion = IPV6_VERSION; 2921 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2922 udp->udp_ttl = us->us_ipv6_hoplimit; 2923 connp->conn_af_isv6 = B_TRUE; 2924 connp->conn_flags |= IPCL_ISV6; 2925 } else { 2926 udp->udp_family = AF_INET; 2927 udp->udp_ipversion = IPV4_VERSION; 2928 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2929 udp->udp_ttl = us->us_ipv4_ttl; 2930 connp->conn_af_isv6 = B_FALSE; 2931 connp->conn_flags &= ~IPCL_ISV6; 2932 } 2933 2934 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2935 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2936 connp->conn_zoneid = zoneid; 2937 2938 udp->udp_open_time = lbolt64; 2939 udp->udp_open_pid = curproc->p_pid; 2940 2941 /* 2942 * If the caller has the process-wide flag set, then default to MAC 2943 * exempt mode. This allows read-down to unlabeled hosts. 2944 */ 2945 if (getpflags(NET_MAC_AWARE, credp) != 0) 2946 udp->udp_mac_exempt = B_TRUE; 2947 2948 if (connp->conn_flags & IPCL_SOCKET) { 2949 udp->udp_issocket = B_TRUE; 2950 udp->udp_direct_sockfs = B_TRUE; 2951 } 2952 2953 connp->conn_ulp_labeled = is_system_labeled(); 2954 2955 mutex_exit(&connp->conn_lock); 2956 udp->udp_us = us; 2957 2958 /* 2959 * The transmit hiwat/lowat is only looked at on IP's queue. 2960 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2961 * getsockopts. 2962 */ 2963 q->q_hiwat = us->us_recv_hiwat; 2964 WR(q)->q_hiwat = us->us_xmit_hiwat; 2965 WR(q)->q_lowat = us->us_xmit_lowat; 2966 2967 if (udp->udp_family == AF_INET6) { 2968 /* Build initial header template for transmit */ 2969 if ((err = udp_build_hdrs(q, udp)) != 0) { 2970 /* XXX missing free of connp? crfree? netstack_rele? */ 2971 qprocsoff(UDP_RD(q)); 2972 udp->udp_connp = NULL; 2973 connp->conn_udp = NULL; 2974 kmem_cache_free(udp_cache, udp); 2975 return (err); 2976 } 2977 } 2978 2979 /* Set the Stream head write offset and high watermark. */ 2980 (void) mi_set_sth_wroff(UDP_RD(q), 2981 udp->udp_max_hdr_len + us->us_wroff_extra); 2982 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2983 2984 return (0); 2985 } 2986 2987 /* 2988 * Which UDP options OK to set through T_UNITDATA_REQ... 2989 */ 2990 /* ARGSUSED */ 2991 static boolean_t 2992 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2993 { 2994 return (B_TRUE); 2995 } 2996 2997 /* 2998 * This routine gets default values of certain options whose default 2999 * values are maintained by protcol specific code 3000 */ 3001 /* ARGSUSED */ 3002 int 3003 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3004 { 3005 udp_t *udp = Q_TO_UDP(q); 3006 udp_stack_t *us = udp->udp_us; 3007 int *i1 = (int *)ptr; 3008 3009 switch (level) { 3010 case IPPROTO_IP: 3011 switch (name) { 3012 case IP_MULTICAST_TTL: 3013 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 3014 return (sizeof (uchar_t)); 3015 case IP_MULTICAST_LOOP: 3016 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 3017 return (sizeof (uchar_t)); 3018 } 3019 break; 3020 case IPPROTO_IPV6: 3021 switch (name) { 3022 case IPV6_MULTICAST_HOPS: 3023 *i1 = IP_DEFAULT_MULTICAST_TTL; 3024 return (sizeof (int)); 3025 case IPV6_MULTICAST_LOOP: 3026 *i1 = IP_DEFAULT_MULTICAST_LOOP; 3027 return (sizeof (int)); 3028 case IPV6_UNICAST_HOPS: 3029 *i1 = us->us_ipv6_hoplimit; 3030 return (sizeof (int)); 3031 } 3032 break; 3033 } 3034 return (-1); 3035 } 3036 3037 /* 3038 * This routine retrieves the current status of socket options 3039 * and expects the caller to pass in the queue pointer of the 3040 * upper instance. It returns the size of the option retrieved. 3041 */ 3042 int 3043 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3044 { 3045 int *i1 = (int *)ptr; 3046 conn_t *connp; 3047 udp_t *udp; 3048 ip6_pkt_t *ipp; 3049 int len; 3050 udp_stack_t *us; 3051 3052 q = UDP_WR(q); 3053 connp = Q_TO_CONN(q); 3054 udp = connp->conn_udp; 3055 ipp = &udp->udp_sticky_ipp; 3056 us = udp->udp_us; 3057 3058 switch (level) { 3059 case SOL_SOCKET: 3060 switch (name) { 3061 case SO_DEBUG: 3062 *i1 = udp->udp_debug; 3063 break; /* goto sizeof (int) option return */ 3064 case SO_REUSEADDR: 3065 *i1 = udp->udp_reuseaddr; 3066 break; /* goto sizeof (int) option return */ 3067 case SO_TYPE: 3068 *i1 = SOCK_DGRAM; 3069 break; /* goto sizeof (int) option return */ 3070 3071 /* 3072 * The following three items are available here, 3073 * but are only meaningful to IP. 3074 */ 3075 case SO_DONTROUTE: 3076 *i1 = udp->udp_dontroute; 3077 break; /* goto sizeof (int) option return */ 3078 case SO_USELOOPBACK: 3079 *i1 = udp->udp_useloopback; 3080 break; /* goto sizeof (int) option return */ 3081 case SO_BROADCAST: 3082 *i1 = udp->udp_broadcast; 3083 break; /* goto sizeof (int) option return */ 3084 3085 case SO_SNDBUF: 3086 *i1 = q->q_hiwat; 3087 break; /* goto sizeof (int) option return */ 3088 case SO_RCVBUF: 3089 *i1 = RD(q)->q_hiwat; 3090 break; /* goto sizeof (int) option return */ 3091 case SO_DGRAM_ERRIND: 3092 *i1 = udp->udp_dgram_errind; 3093 break; /* goto sizeof (int) option return */ 3094 case SO_RECVUCRED: 3095 *i1 = udp->udp_recvucred; 3096 break; /* goto sizeof (int) option return */ 3097 case SO_TIMESTAMP: 3098 *i1 = udp->udp_timestamp; 3099 break; /* goto sizeof (int) option return */ 3100 case SO_ANON_MLP: 3101 *i1 = udp->udp_anon_mlp; 3102 break; /* goto sizeof (int) option return */ 3103 case SO_MAC_EXEMPT: 3104 *i1 = udp->udp_mac_exempt; 3105 break; /* goto sizeof (int) option return */ 3106 case SO_ALLZONES: 3107 *i1 = connp->conn_allzones; 3108 break; /* goto sizeof (int) option return */ 3109 case SO_EXCLBIND: 3110 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 3111 break; 3112 case SO_PROTOTYPE: 3113 *i1 = IPPROTO_UDP; 3114 break; 3115 case SO_DOMAIN: 3116 *i1 = udp->udp_family; 3117 break; 3118 default: 3119 return (-1); 3120 } 3121 break; 3122 case IPPROTO_IP: 3123 if (udp->udp_family != AF_INET) 3124 return (-1); 3125 switch (name) { 3126 case IP_OPTIONS: 3127 case T_IP_OPTIONS: 3128 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 3129 if (len > 0) { 3130 bcopy(udp->udp_ip_rcv_options + 3131 udp->udp_label_len, ptr, len); 3132 } 3133 return (len); 3134 case IP_TOS: 3135 case T_IP_TOS: 3136 *i1 = (int)udp->udp_type_of_service; 3137 break; /* goto sizeof (int) option return */ 3138 case IP_TTL: 3139 *i1 = (int)udp->udp_ttl; 3140 break; /* goto sizeof (int) option return */ 3141 case IP_NEXTHOP: 3142 case IP_RECVPKTINFO: 3143 /* 3144 * This also handles IP_PKTINFO. 3145 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 3146 * Differentiation is based on the size of the argument 3147 * passed in. 3148 * This option is handled in IP which will return an 3149 * error for IP_PKTINFO as it's not supported as a 3150 * sticky option. 3151 */ 3152 return (-EINVAL); 3153 case IP_MULTICAST_IF: 3154 /* 0 address if not set */ 3155 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3156 return (sizeof (ipaddr_t)); 3157 case IP_MULTICAST_TTL: 3158 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3159 return (sizeof (uchar_t)); 3160 case IP_MULTICAST_LOOP: 3161 *ptr = connp->conn_multicast_loop; 3162 return (sizeof (uint8_t)); 3163 case IP_RECVOPTS: 3164 *i1 = udp->udp_recvopts; 3165 break; /* goto sizeof (int) option return */ 3166 case IP_RECVDSTADDR: 3167 *i1 = udp->udp_recvdstaddr; 3168 break; /* goto sizeof (int) option return */ 3169 case IP_RECVIF: 3170 *i1 = udp->udp_recvif; 3171 break; /* goto sizeof (int) option return */ 3172 case IP_RECVSLLA: 3173 *i1 = udp->udp_recvslla; 3174 break; /* goto sizeof (int) option return */ 3175 case IP_RECVTTL: 3176 *i1 = udp->udp_recvttl; 3177 break; /* goto sizeof (int) option return */ 3178 case IP_ADD_MEMBERSHIP: 3179 case IP_DROP_MEMBERSHIP: 3180 case IP_BLOCK_SOURCE: 3181 case IP_UNBLOCK_SOURCE: 3182 case IP_ADD_SOURCE_MEMBERSHIP: 3183 case IP_DROP_SOURCE_MEMBERSHIP: 3184 case MCAST_JOIN_GROUP: 3185 case MCAST_LEAVE_GROUP: 3186 case MCAST_BLOCK_SOURCE: 3187 case MCAST_UNBLOCK_SOURCE: 3188 case MCAST_JOIN_SOURCE_GROUP: 3189 case MCAST_LEAVE_SOURCE_GROUP: 3190 case IP_DONTFAILOVER_IF: 3191 /* cannot "get" the value for these */ 3192 return (-1); 3193 case IP_BOUND_IF: 3194 /* Zero if not set */ 3195 *i1 = udp->udp_bound_if; 3196 break; /* goto sizeof (int) option return */ 3197 case IP_UNSPEC_SRC: 3198 *i1 = udp->udp_unspec_source; 3199 break; /* goto sizeof (int) option return */ 3200 case IP_XMIT_IF: 3201 *i1 = udp->udp_xmit_if; 3202 break; /* goto sizeof (int) option return */ 3203 default: 3204 return (-1); 3205 } 3206 break; 3207 case IPPROTO_IPV6: 3208 if (udp->udp_family != AF_INET6) 3209 return (-1); 3210 switch (name) { 3211 case IPV6_UNICAST_HOPS: 3212 *i1 = (unsigned int)udp->udp_ttl; 3213 break; /* goto sizeof (int) option return */ 3214 case IPV6_MULTICAST_IF: 3215 /* 0 index if not set */ 3216 *i1 = udp->udp_multicast_if_index; 3217 break; /* goto sizeof (int) option return */ 3218 case IPV6_MULTICAST_HOPS: 3219 *i1 = udp->udp_multicast_ttl; 3220 break; /* goto sizeof (int) option return */ 3221 case IPV6_MULTICAST_LOOP: 3222 *i1 = connp->conn_multicast_loop; 3223 break; /* goto sizeof (int) option return */ 3224 case IPV6_JOIN_GROUP: 3225 case IPV6_LEAVE_GROUP: 3226 case MCAST_JOIN_GROUP: 3227 case MCAST_LEAVE_GROUP: 3228 case MCAST_BLOCK_SOURCE: 3229 case MCAST_UNBLOCK_SOURCE: 3230 case MCAST_JOIN_SOURCE_GROUP: 3231 case MCAST_LEAVE_SOURCE_GROUP: 3232 /* cannot "get" the value for these */ 3233 return (-1); 3234 case IPV6_BOUND_IF: 3235 /* Zero if not set */ 3236 *i1 = udp->udp_bound_if; 3237 break; /* goto sizeof (int) option return */ 3238 case IPV6_UNSPEC_SRC: 3239 *i1 = udp->udp_unspec_source; 3240 break; /* goto sizeof (int) option return */ 3241 case IPV6_RECVPKTINFO: 3242 *i1 = udp->udp_ip_recvpktinfo; 3243 break; /* goto sizeof (int) option return */ 3244 case IPV6_RECVTCLASS: 3245 *i1 = udp->udp_ipv6_recvtclass; 3246 break; /* goto sizeof (int) option return */ 3247 case IPV6_RECVPATHMTU: 3248 *i1 = udp->udp_ipv6_recvpathmtu; 3249 break; /* goto sizeof (int) option return */ 3250 case IPV6_RECVHOPLIMIT: 3251 *i1 = udp->udp_ipv6_recvhoplimit; 3252 break; /* goto sizeof (int) option return */ 3253 case IPV6_RECVHOPOPTS: 3254 *i1 = udp->udp_ipv6_recvhopopts; 3255 break; /* goto sizeof (int) option return */ 3256 case IPV6_RECVDSTOPTS: 3257 *i1 = udp->udp_ipv6_recvdstopts; 3258 break; /* goto sizeof (int) option return */ 3259 case _OLD_IPV6_RECVDSTOPTS: 3260 *i1 = udp->udp_old_ipv6_recvdstopts; 3261 break; /* goto sizeof (int) option return */ 3262 case IPV6_RECVRTHDRDSTOPTS: 3263 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3264 break; /* goto sizeof (int) option return */ 3265 case IPV6_RECVRTHDR: 3266 *i1 = udp->udp_ipv6_recvrthdr; 3267 break; /* goto sizeof (int) option return */ 3268 case IPV6_PKTINFO: { 3269 /* XXX assumes that caller has room for max size! */ 3270 struct in6_pktinfo *pkti; 3271 3272 pkti = (struct in6_pktinfo *)ptr; 3273 if (ipp->ipp_fields & IPPF_IFINDEX) 3274 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3275 else 3276 pkti->ipi6_ifindex = 0; 3277 if (ipp->ipp_fields & IPPF_ADDR) 3278 pkti->ipi6_addr = ipp->ipp_addr; 3279 else 3280 pkti->ipi6_addr = ipv6_all_zeros; 3281 return (sizeof (struct in6_pktinfo)); 3282 } 3283 case IPV6_TCLASS: 3284 if (ipp->ipp_fields & IPPF_TCLASS) 3285 *i1 = ipp->ipp_tclass; 3286 else 3287 *i1 = IPV6_FLOW_TCLASS( 3288 IPV6_DEFAULT_VERS_AND_FLOW); 3289 break; /* goto sizeof (int) option return */ 3290 case IPV6_NEXTHOP: { 3291 sin6_t *sin6 = (sin6_t *)ptr; 3292 3293 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3294 return (0); 3295 *sin6 = sin6_null; 3296 sin6->sin6_family = AF_INET6; 3297 sin6->sin6_addr = ipp->ipp_nexthop; 3298 return (sizeof (sin6_t)); 3299 } 3300 case IPV6_HOPOPTS: 3301 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3302 return (0); 3303 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 3304 return (0); 3305 /* 3306 * The cipso/label option is added by kernel. 3307 * User is not usually aware of this option. 3308 * We copy out the hbh opt after the label option. 3309 */ 3310 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 3311 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3312 if (udp->udp_label_len_v6 > 0) { 3313 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 3314 ptr[1] = (ipp->ipp_hopoptslen - 3315 udp->udp_label_len_v6 + 7) / 8 - 1; 3316 } 3317 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3318 case IPV6_RTHDRDSTOPTS: 3319 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3320 return (0); 3321 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3322 return (ipp->ipp_rtdstoptslen); 3323 case IPV6_RTHDR: 3324 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3325 return (0); 3326 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3327 return (ipp->ipp_rthdrlen); 3328 case IPV6_DSTOPTS: 3329 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3330 return (0); 3331 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3332 return (ipp->ipp_dstoptslen); 3333 case IPV6_PATHMTU: 3334 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3335 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 3336 us->us_netstack)); 3337 default: 3338 return (-1); 3339 } 3340 break; 3341 case IPPROTO_UDP: 3342 switch (name) { 3343 case UDP_ANONPRIVBIND: 3344 *i1 = udp->udp_anon_priv_bind; 3345 break; 3346 case UDP_EXCLBIND: 3347 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3348 break; 3349 case UDP_RCVHDR: 3350 *i1 = udp->udp_rcvhdr ? 1 : 0; 3351 break; 3352 default: 3353 return (-1); 3354 } 3355 break; 3356 default: 3357 return (-1); 3358 } 3359 return (sizeof (int)); 3360 } 3361 3362 /* 3363 * This routine sets socket options; it expects the caller 3364 * to pass in the queue pointer of the upper instance. 3365 */ 3366 /* ARGSUSED */ 3367 int 3368 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3369 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3370 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3371 { 3372 udpattrs_t *attrs = thisdg_attrs; 3373 int *i1 = (int *)invalp; 3374 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3375 boolean_t checkonly; 3376 int error; 3377 conn_t *connp; 3378 udp_t *udp; 3379 uint_t newlen; 3380 udp_stack_t *us; 3381 3382 q = UDP_WR(q); 3383 connp = Q_TO_CONN(q); 3384 udp = connp->conn_udp; 3385 us = udp->udp_us; 3386 3387 switch (optset_context) { 3388 case SETFN_OPTCOM_CHECKONLY: 3389 checkonly = B_TRUE; 3390 /* 3391 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3392 * inlen != 0 implies value supplied and 3393 * we have to "pretend" to set it. 3394 * inlen == 0 implies that there is no 3395 * value part in T_CHECK request and just validation 3396 * done elsewhere should be enough, we just return here. 3397 */ 3398 if (inlen == 0) { 3399 *outlenp = 0; 3400 return (0); 3401 } 3402 break; 3403 case SETFN_OPTCOM_NEGOTIATE: 3404 checkonly = B_FALSE; 3405 break; 3406 case SETFN_UD_NEGOTIATE: 3407 case SETFN_CONN_NEGOTIATE: 3408 checkonly = B_FALSE; 3409 /* 3410 * Negotiating local and "association-related" options 3411 * through T_UNITDATA_REQ. 3412 * 3413 * Following routine can filter out ones we do not 3414 * want to be "set" this way. 3415 */ 3416 if (!udp_opt_allow_udr_set(level, name)) { 3417 *outlenp = 0; 3418 return (EINVAL); 3419 } 3420 break; 3421 default: 3422 /* 3423 * We should never get here 3424 */ 3425 *outlenp = 0; 3426 return (EINVAL); 3427 } 3428 3429 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3430 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3431 3432 /* 3433 * For fixed length options, no sanity check 3434 * of passed in length is done. It is assumed *_optcom_req() 3435 * routines do the right thing. 3436 */ 3437 3438 switch (level) { 3439 case SOL_SOCKET: 3440 switch (name) { 3441 case SO_REUSEADDR: 3442 if (!checkonly) 3443 udp->udp_reuseaddr = onoff; 3444 break; 3445 case SO_DEBUG: 3446 if (!checkonly) 3447 udp->udp_debug = onoff; 3448 break; 3449 /* 3450 * The following three items are available here, 3451 * but are only meaningful to IP. 3452 */ 3453 case SO_DONTROUTE: 3454 if (!checkonly) 3455 udp->udp_dontroute = onoff; 3456 break; 3457 case SO_USELOOPBACK: 3458 if (!checkonly) 3459 udp->udp_useloopback = onoff; 3460 break; 3461 case SO_BROADCAST: 3462 if (!checkonly) 3463 udp->udp_broadcast = onoff; 3464 break; 3465 3466 case SO_SNDBUF: 3467 if (*i1 > us->us_max_buf) { 3468 *outlenp = 0; 3469 return (ENOBUFS); 3470 } 3471 if (!checkonly) { 3472 q->q_hiwat = *i1; 3473 WR(UDP_RD(q))->q_hiwat = *i1; 3474 } 3475 break; 3476 case SO_RCVBUF: 3477 if (*i1 > us->us_max_buf) { 3478 *outlenp = 0; 3479 return (ENOBUFS); 3480 } 3481 if (!checkonly) { 3482 RD(q)->q_hiwat = *i1; 3483 UDP_RD(q)->q_hiwat = *i1; 3484 (void) mi_set_sth_hiwat(UDP_RD(q), 3485 udp_set_rcv_hiwat(udp, *i1)); 3486 } 3487 break; 3488 case SO_DGRAM_ERRIND: 3489 if (!checkonly) 3490 udp->udp_dgram_errind = onoff; 3491 break; 3492 case SO_RECVUCRED: 3493 if (!checkonly) 3494 udp->udp_recvucred = onoff; 3495 break; 3496 case SO_ALLZONES: 3497 /* 3498 * "soft" error (negative) 3499 * option not handled at this level 3500 * Do not modify *outlenp. 3501 */ 3502 return (-EINVAL); 3503 case SO_TIMESTAMP: 3504 if (!checkonly) 3505 udp->udp_timestamp = onoff; 3506 break; 3507 case SO_ANON_MLP: 3508 if (!checkonly) 3509 udp->udp_anon_mlp = onoff; 3510 break; 3511 case SO_MAC_EXEMPT: 3512 if (secpolicy_net_mac_aware(cr) != 0 || 3513 udp->udp_state != TS_UNBND) 3514 return (EACCES); 3515 if (!checkonly) 3516 udp->udp_mac_exempt = onoff; 3517 break; 3518 case SCM_UCRED: { 3519 struct ucred_s *ucr; 3520 cred_t *cr, *newcr; 3521 ts_label_t *tsl; 3522 3523 /* 3524 * Only sockets that have proper privileges and are 3525 * bound to MLPs will have any other value here, so 3526 * this implicitly tests for privilege to set label. 3527 */ 3528 if (connp->conn_mlp_type == mlptSingle) 3529 break; 3530 ucr = (struct ucred_s *)invalp; 3531 if (inlen != ucredsize || 3532 ucr->uc_labeloff < sizeof (*ucr) || 3533 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3534 return (EINVAL); 3535 if (!checkonly) { 3536 mblk_t *mb; 3537 3538 if (attrs == NULL || 3539 (mb = attrs->udpattr_mb) == NULL) 3540 return (EINVAL); 3541 if ((cr = DB_CRED(mb)) == NULL) 3542 cr = udp->udp_connp->conn_cred; 3543 ASSERT(cr != NULL); 3544 if ((tsl = crgetlabel(cr)) == NULL) 3545 return (EINVAL); 3546 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3547 tsl->tsl_doi, KM_NOSLEEP); 3548 if (newcr == NULL) 3549 return (ENOSR); 3550 mblk_setcred(mb, newcr); 3551 attrs->udpattr_credset = B_TRUE; 3552 crfree(newcr); 3553 } 3554 break; 3555 } 3556 case SO_EXCLBIND: 3557 if (!checkonly) 3558 udp->udp_exclbind = onoff; 3559 break; 3560 default: 3561 *outlenp = 0; 3562 return (EINVAL); 3563 } 3564 break; 3565 case IPPROTO_IP: 3566 if (udp->udp_family != AF_INET) { 3567 *outlenp = 0; 3568 return (ENOPROTOOPT); 3569 } 3570 switch (name) { 3571 case IP_OPTIONS: 3572 case T_IP_OPTIONS: 3573 /* Save options for use by IP. */ 3574 newlen = inlen + udp->udp_label_len; 3575 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3576 *outlenp = 0; 3577 return (EINVAL); 3578 } 3579 if (checkonly) 3580 break; 3581 3582 if (!tsol_option_set(&udp->udp_ip_snd_options, 3583 &udp->udp_ip_snd_options_len, 3584 udp->udp_label_len, invalp, inlen)) { 3585 *outlenp = 0; 3586 return (ENOMEM); 3587 } 3588 3589 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3590 UDPH_SIZE + udp->udp_ip_snd_options_len; 3591 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3592 us->us_wroff_extra); 3593 break; 3594 3595 case IP_TTL: 3596 if (!checkonly) { 3597 udp->udp_ttl = (uchar_t)*i1; 3598 } 3599 break; 3600 case IP_TOS: 3601 case T_IP_TOS: 3602 if (!checkonly) { 3603 udp->udp_type_of_service = (uchar_t)*i1; 3604 } 3605 break; 3606 case IP_MULTICAST_IF: { 3607 /* 3608 * TODO should check OPTMGMT reply and undo this if 3609 * there is an error. 3610 */ 3611 struct in_addr *inap = (struct in_addr *)invalp; 3612 if (!checkonly) { 3613 udp->udp_multicast_if_addr = 3614 inap->s_addr; 3615 } 3616 break; 3617 } 3618 case IP_MULTICAST_TTL: 3619 if (!checkonly) 3620 udp->udp_multicast_ttl = *invalp; 3621 break; 3622 case IP_MULTICAST_LOOP: 3623 if (!checkonly) 3624 connp->conn_multicast_loop = *invalp; 3625 break; 3626 case IP_RECVOPTS: 3627 if (!checkonly) 3628 udp->udp_recvopts = onoff; 3629 break; 3630 case IP_RECVDSTADDR: 3631 if (!checkonly) 3632 udp->udp_recvdstaddr = onoff; 3633 break; 3634 case IP_RECVIF: 3635 if (!checkonly) 3636 udp->udp_recvif = onoff; 3637 break; 3638 case IP_RECVSLLA: 3639 if (!checkonly) 3640 udp->udp_recvslla = onoff; 3641 break; 3642 case IP_RECVTTL: 3643 if (!checkonly) 3644 udp->udp_recvttl = onoff; 3645 break; 3646 case IP_PKTINFO: { 3647 /* 3648 * This also handles IP_RECVPKTINFO. 3649 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3650 * Differentiation is based on the size of the 3651 * argument passed in. 3652 */ 3653 struct in_pktinfo *pktinfop; 3654 ip4_pkt_t *attr_pktinfop; 3655 3656 if (checkonly) 3657 break; 3658 3659 if (inlen == sizeof (int)) { 3660 /* 3661 * This is IP_RECVPKTINFO option. 3662 * Keep a local copy of whether this option is 3663 * set or not and pass it down to IP for 3664 * processing. 3665 */ 3666 3667 udp->udp_ip_recvpktinfo = onoff; 3668 return (-EINVAL); 3669 } 3670 3671 if (attrs == NULL || 3672 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3673 /* 3674 * sticky option or no buffer to return 3675 * the results. 3676 */ 3677 return (EINVAL); 3678 } 3679 3680 if (inlen != sizeof (struct in_pktinfo)) 3681 return (EINVAL); 3682 3683 pktinfop = (struct in_pktinfo *)invalp; 3684 3685 /* 3686 * At least one of the values should be specified 3687 */ 3688 if (pktinfop->ipi_ifindex == 0 && 3689 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3690 return (EINVAL); 3691 } 3692 3693 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3694 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3695 3696 break; 3697 } 3698 case IP_ADD_MEMBERSHIP: 3699 case IP_DROP_MEMBERSHIP: 3700 case IP_BLOCK_SOURCE: 3701 case IP_UNBLOCK_SOURCE: 3702 case IP_ADD_SOURCE_MEMBERSHIP: 3703 case IP_DROP_SOURCE_MEMBERSHIP: 3704 case MCAST_JOIN_GROUP: 3705 case MCAST_LEAVE_GROUP: 3706 case MCAST_BLOCK_SOURCE: 3707 case MCAST_UNBLOCK_SOURCE: 3708 case MCAST_JOIN_SOURCE_GROUP: 3709 case MCAST_LEAVE_SOURCE_GROUP: 3710 case IP_SEC_OPT: 3711 case IP_NEXTHOP: 3712 /* 3713 * "soft" error (negative) 3714 * option not handled at this level 3715 * Do not modify *outlenp. 3716 */ 3717 return (-EINVAL); 3718 case IP_BOUND_IF: 3719 if (!checkonly) 3720 udp->udp_bound_if = *i1; 3721 break; 3722 case IP_UNSPEC_SRC: 3723 if (!checkonly) 3724 udp->udp_unspec_source = onoff; 3725 break; 3726 case IP_XMIT_IF: 3727 if (!checkonly) 3728 udp->udp_xmit_if = *i1; 3729 break; 3730 default: 3731 *outlenp = 0; 3732 return (EINVAL); 3733 } 3734 break; 3735 case IPPROTO_IPV6: { 3736 ip6_pkt_t *ipp; 3737 boolean_t sticky; 3738 3739 if (udp->udp_family != AF_INET6) { 3740 *outlenp = 0; 3741 return (ENOPROTOOPT); 3742 } 3743 /* 3744 * Deal with both sticky options and ancillary data 3745 */ 3746 sticky = B_FALSE; 3747 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3748 NULL) { 3749 /* sticky options, or none */ 3750 ipp = &udp->udp_sticky_ipp; 3751 sticky = B_TRUE; 3752 } 3753 3754 switch (name) { 3755 case IPV6_MULTICAST_IF: 3756 if (!checkonly) 3757 udp->udp_multicast_if_index = *i1; 3758 break; 3759 case IPV6_UNICAST_HOPS: 3760 /* -1 means use default */ 3761 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3762 *outlenp = 0; 3763 return (EINVAL); 3764 } 3765 if (!checkonly) { 3766 if (*i1 == -1) { 3767 udp->udp_ttl = ipp->ipp_unicast_hops = 3768 us->us_ipv6_hoplimit; 3769 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3770 /* Pass modified value to IP. */ 3771 *i1 = udp->udp_ttl; 3772 } else { 3773 udp->udp_ttl = ipp->ipp_unicast_hops = 3774 (uint8_t)*i1; 3775 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3776 } 3777 /* Rebuild the header template */ 3778 error = udp_build_hdrs(q, udp); 3779 if (error != 0) { 3780 *outlenp = 0; 3781 return (error); 3782 } 3783 } 3784 break; 3785 case IPV6_MULTICAST_HOPS: 3786 /* -1 means use default */ 3787 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3788 *outlenp = 0; 3789 return (EINVAL); 3790 } 3791 if (!checkonly) { 3792 if (*i1 == -1) { 3793 udp->udp_multicast_ttl = 3794 ipp->ipp_multicast_hops = 3795 IP_DEFAULT_MULTICAST_TTL; 3796 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3797 /* Pass modified value to IP. */ 3798 *i1 = udp->udp_multicast_ttl; 3799 } else { 3800 udp->udp_multicast_ttl = 3801 ipp->ipp_multicast_hops = 3802 (uint8_t)*i1; 3803 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3804 } 3805 } 3806 break; 3807 case IPV6_MULTICAST_LOOP: 3808 if (*i1 != 0 && *i1 != 1) { 3809 *outlenp = 0; 3810 return (EINVAL); 3811 } 3812 if (!checkonly) 3813 connp->conn_multicast_loop = *i1; 3814 break; 3815 case IPV6_JOIN_GROUP: 3816 case IPV6_LEAVE_GROUP: 3817 case MCAST_JOIN_GROUP: 3818 case MCAST_LEAVE_GROUP: 3819 case MCAST_BLOCK_SOURCE: 3820 case MCAST_UNBLOCK_SOURCE: 3821 case MCAST_JOIN_SOURCE_GROUP: 3822 case MCAST_LEAVE_SOURCE_GROUP: 3823 /* 3824 * "soft" error (negative) 3825 * option not handled at this level 3826 * Note: Do not modify *outlenp 3827 */ 3828 return (-EINVAL); 3829 case IPV6_BOUND_IF: 3830 if (!checkonly) 3831 udp->udp_bound_if = *i1; 3832 break; 3833 case IPV6_UNSPEC_SRC: 3834 if (!checkonly) 3835 udp->udp_unspec_source = onoff; 3836 break; 3837 /* 3838 * Set boolean switches for ancillary data delivery 3839 */ 3840 case IPV6_RECVPKTINFO: 3841 if (!checkonly) 3842 udp->udp_ip_recvpktinfo = onoff; 3843 break; 3844 case IPV6_RECVTCLASS: 3845 if (!checkonly) { 3846 udp->udp_ipv6_recvtclass = onoff; 3847 } 3848 break; 3849 case IPV6_RECVPATHMTU: 3850 if (!checkonly) { 3851 udp->udp_ipv6_recvpathmtu = onoff; 3852 } 3853 break; 3854 case IPV6_RECVHOPLIMIT: 3855 if (!checkonly) 3856 udp->udp_ipv6_recvhoplimit = onoff; 3857 break; 3858 case IPV6_RECVHOPOPTS: 3859 if (!checkonly) 3860 udp->udp_ipv6_recvhopopts = onoff; 3861 break; 3862 case IPV6_RECVDSTOPTS: 3863 if (!checkonly) 3864 udp->udp_ipv6_recvdstopts = onoff; 3865 break; 3866 case _OLD_IPV6_RECVDSTOPTS: 3867 if (!checkonly) 3868 udp->udp_old_ipv6_recvdstopts = onoff; 3869 break; 3870 case IPV6_RECVRTHDRDSTOPTS: 3871 if (!checkonly) 3872 udp->udp_ipv6_recvrthdrdstopts = onoff; 3873 break; 3874 case IPV6_RECVRTHDR: 3875 if (!checkonly) 3876 udp->udp_ipv6_recvrthdr = onoff; 3877 break; 3878 /* 3879 * Set sticky options or ancillary data. 3880 * If sticky options, (re)build any extension headers 3881 * that might be needed as a result. 3882 */ 3883 case IPV6_PKTINFO: 3884 /* 3885 * The source address and ifindex are verified 3886 * in ip_opt_set(). For ancillary data the 3887 * source address is checked in ip_wput_v6. 3888 */ 3889 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3890 return (EINVAL); 3891 if (checkonly) 3892 break; 3893 3894 if (inlen == 0) { 3895 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3896 ipp->ipp_sticky_ignored |= 3897 (IPPF_IFINDEX|IPPF_ADDR); 3898 } else { 3899 struct in6_pktinfo *pkti; 3900 3901 pkti = (struct in6_pktinfo *)invalp; 3902 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3903 ipp->ipp_addr = pkti->ipi6_addr; 3904 if (ipp->ipp_ifindex != 0) 3905 ipp->ipp_fields |= IPPF_IFINDEX; 3906 else 3907 ipp->ipp_fields &= ~IPPF_IFINDEX; 3908 if (!IN6_IS_ADDR_UNSPECIFIED( 3909 &ipp->ipp_addr)) 3910 ipp->ipp_fields |= IPPF_ADDR; 3911 else 3912 ipp->ipp_fields &= ~IPPF_ADDR; 3913 } 3914 if (sticky) { 3915 error = udp_build_hdrs(q, udp); 3916 if (error != 0) 3917 return (error); 3918 } 3919 break; 3920 case IPV6_HOPLIMIT: 3921 if (sticky) 3922 return (EINVAL); 3923 if (inlen != 0 && inlen != sizeof (int)) 3924 return (EINVAL); 3925 if (checkonly) 3926 break; 3927 3928 if (inlen == 0) { 3929 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3930 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3931 } else { 3932 if (*i1 > 255 || *i1 < -1) 3933 return (EINVAL); 3934 if (*i1 == -1) 3935 ipp->ipp_hoplimit = 3936 us->us_ipv6_hoplimit; 3937 else 3938 ipp->ipp_hoplimit = *i1; 3939 ipp->ipp_fields |= IPPF_HOPLIMIT; 3940 } 3941 break; 3942 case IPV6_TCLASS: 3943 if (inlen != 0 && inlen != sizeof (int)) 3944 return (EINVAL); 3945 if (checkonly) 3946 break; 3947 3948 if (inlen == 0) { 3949 ipp->ipp_fields &= ~IPPF_TCLASS; 3950 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3951 } else { 3952 if (*i1 > 255 || *i1 < -1) 3953 return (EINVAL); 3954 if (*i1 == -1) 3955 ipp->ipp_tclass = 0; 3956 else 3957 ipp->ipp_tclass = *i1; 3958 ipp->ipp_fields |= IPPF_TCLASS; 3959 } 3960 if (sticky) { 3961 error = udp_build_hdrs(q, udp); 3962 if (error != 0) 3963 return (error); 3964 } 3965 break; 3966 case IPV6_NEXTHOP: 3967 /* 3968 * IP will verify that the nexthop is reachable 3969 * and fail for sticky options. 3970 */ 3971 if (inlen != 0 && inlen != sizeof (sin6_t)) 3972 return (EINVAL); 3973 if (checkonly) 3974 break; 3975 3976 if (inlen == 0) { 3977 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3978 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3979 } else { 3980 sin6_t *sin6 = (sin6_t *)invalp; 3981 3982 if (sin6->sin6_family != AF_INET6) 3983 return (EAFNOSUPPORT); 3984 if (IN6_IS_ADDR_V4MAPPED( 3985 &sin6->sin6_addr)) 3986 return (EADDRNOTAVAIL); 3987 ipp->ipp_nexthop = sin6->sin6_addr; 3988 if (!IN6_IS_ADDR_UNSPECIFIED( 3989 &ipp->ipp_nexthop)) 3990 ipp->ipp_fields |= IPPF_NEXTHOP; 3991 else 3992 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3993 } 3994 if (sticky) { 3995 error = udp_build_hdrs(q, udp); 3996 if (error != 0) 3997 return (error); 3998 } 3999 break; 4000 case IPV6_HOPOPTS: { 4001 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 4002 /* 4003 * Sanity checks - minimum size, size a multiple of 4004 * eight bytes, and matching size passed in. 4005 */ 4006 if (inlen != 0 && 4007 inlen != (8 * (hopts->ip6h_len + 1))) 4008 return (EINVAL); 4009 4010 if (checkonly) 4011 break; 4012 4013 error = optcom_pkt_set(invalp, inlen, sticky, 4014 (uchar_t **)&ipp->ipp_hopopts, 4015 &ipp->ipp_hopoptslen, 4016 sticky ? udp->udp_label_len_v6 : 0); 4017 if (error != 0) 4018 return (error); 4019 if (ipp->ipp_hopoptslen == 0) { 4020 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4021 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 4022 } else { 4023 ipp->ipp_fields |= IPPF_HOPOPTS; 4024 } 4025 if (sticky) { 4026 error = udp_build_hdrs(q, udp); 4027 if (error != 0) 4028 return (error); 4029 } 4030 break; 4031 } 4032 case IPV6_RTHDRDSTOPTS: { 4033 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4034 4035 /* 4036 * Sanity checks - minimum size, size a multiple of 4037 * eight bytes, and matching size passed in. 4038 */ 4039 if (inlen != 0 && 4040 inlen != (8 * (dopts->ip6d_len + 1))) 4041 return (EINVAL); 4042 4043 if (checkonly) 4044 break; 4045 4046 if (inlen == 0) { 4047 if (sticky && 4048 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 4049 kmem_free(ipp->ipp_rtdstopts, 4050 ipp->ipp_rtdstoptslen); 4051 ipp->ipp_rtdstopts = NULL; 4052 ipp->ipp_rtdstoptslen = 0; 4053 } 4054 4055 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 4056 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 4057 } else { 4058 error = optcom_pkt_set(invalp, inlen, sticky, 4059 (uchar_t **)&ipp->ipp_rtdstopts, 4060 &ipp->ipp_rtdstoptslen, 0); 4061 if (error != 0) 4062 return (error); 4063 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4064 } 4065 if (sticky) { 4066 error = udp_build_hdrs(q, udp); 4067 if (error != 0) 4068 return (error); 4069 } 4070 break; 4071 } 4072 case IPV6_DSTOPTS: { 4073 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4074 4075 /* 4076 * Sanity checks - minimum size, size a multiple of 4077 * eight bytes, and matching size passed in. 4078 */ 4079 if (inlen != 0 && 4080 inlen != (8 * (dopts->ip6d_len + 1))) 4081 return (EINVAL); 4082 4083 if (checkonly) 4084 break; 4085 4086 if (inlen == 0) { 4087 if (sticky && 4088 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 4089 kmem_free(ipp->ipp_dstopts, 4090 ipp->ipp_dstoptslen); 4091 ipp->ipp_dstopts = NULL; 4092 ipp->ipp_dstoptslen = 0; 4093 } 4094 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4095 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 4096 } else { 4097 error = optcom_pkt_set(invalp, inlen, sticky, 4098 (uchar_t **)&ipp->ipp_dstopts, 4099 &ipp->ipp_dstoptslen, 0); 4100 if (error != 0) 4101 return (error); 4102 ipp->ipp_fields |= IPPF_DSTOPTS; 4103 } 4104 if (sticky) { 4105 error = udp_build_hdrs(q, udp); 4106 if (error != 0) 4107 return (error); 4108 } 4109 break; 4110 } 4111 case IPV6_RTHDR: { 4112 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 4113 4114 /* 4115 * Sanity checks - minimum size, size a multiple of 4116 * eight bytes, and matching size passed in. 4117 */ 4118 if (inlen != 0 && 4119 inlen != (8 * (rt->ip6r_len + 1))) 4120 return (EINVAL); 4121 4122 if (checkonly) 4123 break; 4124 4125 if (inlen == 0) { 4126 if (sticky && 4127 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 4128 kmem_free(ipp->ipp_rthdr, 4129 ipp->ipp_rthdrlen); 4130 ipp->ipp_rthdr = NULL; 4131 ipp->ipp_rthdrlen = 0; 4132 } 4133 ipp->ipp_fields &= ~IPPF_RTHDR; 4134 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 4135 } else { 4136 error = optcom_pkt_set(invalp, inlen, sticky, 4137 (uchar_t **)&ipp->ipp_rthdr, 4138 &ipp->ipp_rthdrlen, 0); 4139 if (error != 0) 4140 return (error); 4141 ipp->ipp_fields |= IPPF_RTHDR; 4142 } 4143 if (sticky) { 4144 error = udp_build_hdrs(q, udp); 4145 if (error != 0) 4146 return (error); 4147 } 4148 break; 4149 } 4150 4151 case IPV6_DONTFRAG: 4152 if (checkonly) 4153 break; 4154 4155 if (onoff) { 4156 ipp->ipp_fields |= IPPF_DONTFRAG; 4157 } else { 4158 ipp->ipp_fields &= ~IPPF_DONTFRAG; 4159 } 4160 break; 4161 4162 case IPV6_USE_MIN_MTU: 4163 if (inlen != sizeof (int)) 4164 return (EINVAL); 4165 4166 if (*i1 < -1 || *i1 > 1) 4167 return (EINVAL); 4168 4169 if (checkonly) 4170 break; 4171 4172 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 4173 ipp->ipp_use_min_mtu = *i1; 4174 break; 4175 4176 case IPV6_BOUND_PIF: 4177 case IPV6_SEC_OPT: 4178 case IPV6_DONTFAILOVER_IF: 4179 case IPV6_SRC_PREFERENCES: 4180 case IPV6_V6ONLY: 4181 /* Handled at the IP level */ 4182 return (-EINVAL); 4183 default: 4184 *outlenp = 0; 4185 return (EINVAL); 4186 } 4187 break; 4188 } /* end IPPROTO_IPV6 */ 4189 case IPPROTO_UDP: 4190 switch (name) { 4191 case UDP_ANONPRIVBIND: 4192 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 4193 *outlenp = 0; 4194 return (error); 4195 } 4196 if (!checkonly) { 4197 udp->udp_anon_priv_bind = onoff; 4198 } 4199 break; 4200 case UDP_EXCLBIND: 4201 if (!checkonly) 4202 udp->udp_exclbind = onoff; 4203 break; 4204 case UDP_RCVHDR: 4205 if (!checkonly) 4206 udp->udp_rcvhdr = onoff; 4207 break; 4208 default: 4209 *outlenp = 0; 4210 return (EINVAL); 4211 } 4212 break; 4213 default: 4214 *outlenp = 0; 4215 return (EINVAL); 4216 } 4217 /* 4218 * Common case of OK return with outval same as inval. 4219 */ 4220 if (invalp != outvalp) { 4221 /* don't trust bcopy for identical src/dst */ 4222 (void) bcopy(invalp, outvalp, inlen); 4223 } 4224 *outlenp = inlen; 4225 return (0); 4226 } 4227 4228 /* 4229 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 4230 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 4231 * headers, and the udp header. 4232 * Returns failure if can't allocate memory. 4233 */ 4234 static int 4235 udp_build_hdrs(queue_t *q, udp_t *udp) 4236 { 4237 udp_stack_t *us = udp->udp_us; 4238 uchar_t *hdrs; 4239 uint_t hdrs_len; 4240 ip6_t *ip6h; 4241 ip6i_t *ip6i; 4242 udpha_t *udpha; 4243 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 4244 4245 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 4246 ASSERT(hdrs_len != 0); 4247 if (hdrs_len != udp->udp_sticky_hdrs_len) { 4248 /* Need to reallocate */ 4249 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 4250 if (hdrs == NULL) 4251 return (ENOMEM); 4252 4253 if (udp->udp_sticky_hdrs_len != 0) { 4254 kmem_free(udp->udp_sticky_hdrs, 4255 udp->udp_sticky_hdrs_len); 4256 } 4257 udp->udp_sticky_hdrs = hdrs; 4258 udp->udp_sticky_hdrs_len = hdrs_len; 4259 } 4260 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 4261 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 4262 4263 /* Set header fields not in ipp */ 4264 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 4265 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4266 ip6h = (ip6_t *)&ip6i[1]; 4267 } else { 4268 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4269 } 4270 4271 if (!(ipp->ipp_fields & IPPF_ADDR)) 4272 ip6h->ip6_src = udp->udp_v6src; 4273 4274 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4275 udpha->uha_src_port = udp->udp_port; 4276 4277 /* Try to get everything in a single mblk */ 4278 if (hdrs_len > udp->udp_max_hdr_len) { 4279 udp->udp_max_hdr_len = hdrs_len; 4280 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4281 us->us_wroff_extra); 4282 } 4283 return (0); 4284 } 4285 4286 /* 4287 * This routine retrieves the value of an ND variable in a udpparam_t 4288 * structure. It is called through nd_getset when a user reads the 4289 * variable. 4290 */ 4291 /* ARGSUSED */ 4292 static int 4293 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4294 { 4295 udpparam_t *udppa = (udpparam_t *)cp; 4296 4297 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4298 return (0); 4299 } 4300 4301 /* 4302 * Walk through the param array specified registering each element with the 4303 * named dispatch (ND) handler. 4304 */ 4305 static boolean_t 4306 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 4307 { 4308 for (; cnt-- > 0; udppa++) { 4309 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4310 if (!nd_load(ndp, udppa->udp_param_name, 4311 udp_param_get, udp_param_set, 4312 (caddr_t)udppa)) { 4313 nd_free(ndp); 4314 return (B_FALSE); 4315 } 4316 } 4317 } 4318 if (!nd_load(ndp, "udp_extra_priv_ports", 4319 udp_extra_priv_ports_get, NULL, NULL)) { 4320 nd_free(ndp); 4321 return (B_FALSE); 4322 } 4323 if (!nd_load(ndp, "udp_extra_priv_ports_add", 4324 NULL, udp_extra_priv_ports_add, NULL)) { 4325 nd_free(ndp); 4326 return (B_FALSE); 4327 } 4328 if (!nd_load(ndp, "udp_extra_priv_ports_del", 4329 NULL, udp_extra_priv_ports_del, NULL)) { 4330 nd_free(ndp); 4331 return (B_FALSE); 4332 } 4333 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 4334 NULL)) { 4335 nd_free(ndp); 4336 return (B_FALSE); 4337 } 4338 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 4339 NULL)) { 4340 nd_free(ndp); 4341 return (B_FALSE); 4342 } 4343 return (B_TRUE); 4344 } 4345 4346 /* This routine sets an ND variable in a udpparam_t structure. */ 4347 /* ARGSUSED */ 4348 static int 4349 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4350 { 4351 long new_value; 4352 udpparam_t *udppa = (udpparam_t *)cp; 4353 4354 /* 4355 * Fail the request if the new value does not lie within the 4356 * required bounds. 4357 */ 4358 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4359 new_value < udppa->udp_param_min || 4360 new_value > udppa->udp_param_max) { 4361 return (EINVAL); 4362 } 4363 4364 /* Set the new value */ 4365 udppa->udp_param_value = new_value; 4366 return (0); 4367 } 4368 4369 /* 4370 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 4371 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 4372 * just count the length needed for allocation. If 'dbuf' is non-NULL, 4373 * then it's assumed to be allocated to be large enough. 4374 * 4375 * Returns zero if trimming of the security option causes all options to go 4376 * away. 4377 */ 4378 static size_t 4379 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 4380 { 4381 struct T_opthdr *toh; 4382 size_t hol = ipp->ipp_hopoptslen; 4383 ip6_hbh_t *dstopt = NULL; 4384 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4385 size_t tlen, olen, plen; 4386 boolean_t deleting; 4387 const struct ip6_opt *sopt, *lastpad; 4388 struct ip6_opt *dopt; 4389 4390 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4391 toh->level = IPPROTO_IPV6; 4392 toh->name = IPV6_HOPOPTS; 4393 toh->status = 0; 4394 dstopt = (ip6_hbh_t *)(toh + 1); 4395 } 4396 4397 /* 4398 * If labeling is enabled, then skip the label option 4399 * but get other options if there are any. 4400 */ 4401 if (is_system_labeled()) { 4402 dopt = NULL; 4403 if (dstopt != NULL) { 4404 /* will fill in ip6h_len later */ 4405 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4406 dopt = (struct ip6_opt *)(dstopt + 1); 4407 } 4408 sopt = (const struct ip6_opt *)(srcopt + 1); 4409 hol -= sizeof (*srcopt); 4410 tlen = sizeof (*dstopt); 4411 lastpad = NULL; 4412 deleting = B_FALSE; 4413 /* 4414 * This loop finds the first (lastpad pointer) of any number of 4415 * pads that preceeds the security option, then treats the 4416 * security option as though it were a pad, and then finds the 4417 * next non-pad option (or end of list). 4418 * 4419 * It then treats the entire block as one big pad. To preserve 4420 * alignment of any options that follow, or just the end of the 4421 * list, it computes a minimal new padding size that keeps the 4422 * same alignment for the next option. 4423 * 4424 * If it encounters just a sequence of pads with no security 4425 * option, those are copied as-is rather than collapsed. 4426 * 4427 * Note that to handle the end of list case, the code makes one 4428 * loop with 'hol' set to zero. 4429 */ 4430 for (;;) { 4431 if (hol > 0) { 4432 if (sopt->ip6o_type == IP6OPT_PAD1) { 4433 if (lastpad == NULL) 4434 lastpad = sopt; 4435 sopt = (const struct ip6_opt *) 4436 &sopt->ip6o_len; 4437 hol--; 4438 continue; 4439 } 4440 olen = sopt->ip6o_len + sizeof (*sopt); 4441 if (olen > hol) 4442 olen = hol; 4443 if (sopt->ip6o_type == IP6OPT_PADN || 4444 sopt->ip6o_type == ip6opt_ls) { 4445 if (sopt->ip6o_type == ip6opt_ls) 4446 deleting = B_TRUE; 4447 if (lastpad == NULL) 4448 lastpad = sopt; 4449 sopt = (const struct ip6_opt *) 4450 ((const char *)sopt + olen); 4451 hol -= olen; 4452 continue; 4453 } 4454 } else { 4455 /* if nothing was copied at all, then delete */ 4456 if (tlen == sizeof (*dstopt)) 4457 return (0); 4458 /* last pass; pick up any trailing padding */ 4459 olen = 0; 4460 } 4461 if (deleting) { 4462 /* 4463 * compute aligning effect of deleted material 4464 * to reproduce with pad. 4465 */ 4466 plen = ((const char *)sopt - 4467 (const char *)lastpad) & 7; 4468 tlen += plen; 4469 if (dopt != NULL) { 4470 if (plen == 1) { 4471 dopt->ip6o_type = IP6OPT_PAD1; 4472 } else if (plen > 1) { 4473 plen -= sizeof (*dopt); 4474 dopt->ip6o_type = IP6OPT_PADN; 4475 dopt->ip6o_len = plen; 4476 if (plen > 0) 4477 bzero(dopt + 1, plen); 4478 } 4479 dopt = (struct ip6_opt *) 4480 ((char *)dopt + plen); 4481 } 4482 deleting = B_FALSE; 4483 lastpad = NULL; 4484 } 4485 /* if there's uncopied padding, then copy that now */ 4486 if (lastpad != NULL) { 4487 olen += (const char *)sopt - 4488 (const char *)lastpad; 4489 sopt = lastpad; 4490 lastpad = NULL; 4491 } 4492 if (dopt != NULL && olen > 0) { 4493 bcopy(sopt, dopt, olen); 4494 dopt = (struct ip6_opt *)((char *)dopt + olen); 4495 } 4496 if (hol == 0) 4497 break; 4498 tlen += olen; 4499 sopt = (const struct ip6_opt *) 4500 ((const char *)sopt + olen); 4501 hol -= olen; 4502 } 4503 /* go back and patch up the length value, rounded upward */ 4504 if (dstopt != NULL) 4505 dstopt->ip6h_len = (tlen - 1) >> 3; 4506 } else { 4507 tlen = hol; 4508 if (dstopt != NULL) 4509 bcopy(srcopt, dstopt, hol); 4510 } 4511 4512 tlen += sizeof (*toh); 4513 if (toh != NULL) 4514 toh->len = tlen; 4515 4516 return (tlen); 4517 } 4518 4519 static void 4520 udp_input(conn_t *connp, mblk_t *mp) 4521 { 4522 struct T_unitdata_ind *tudi; 4523 uchar_t *rptr; /* Pointer to IP header */ 4524 int hdr_length; /* Length of IP+UDP headers */ 4525 int udi_size; /* Size of T_unitdata_ind */ 4526 int mp_len; 4527 udp_t *udp; 4528 udpha_t *udpha; 4529 int ipversion; 4530 ip6_pkt_t ipp; 4531 ip6_t *ip6h; 4532 ip6i_t *ip6i; 4533 mblk_t *mp1; 4534 mblk_t *options_mp = NULL; 4535 ip_pktinfo_t *pinfo = NULL; 4536 cred_t *cr = NULL; 4537 queue_t *q = connp->conn_rq; 4538 pid_t cpid; 4539 cred_t *rcr = connp->conn_cred; 4540 udp_stack_t *us; 4541 4542 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4543 "udp_rput_start: q %p mp %p", q, mp); 4544 4545 udp = connp->conn_udp; 4546 us = udp->udp_us; 4547 rptr = mp->b_rptr; 4548 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4549 ASSERT(OK_32PTR(rptr)); 4550 4551 /* 4552 * IP should have prepended the options data in an M_CTL 4553 * Check M_CTL "type" to make sure are not here bcos of 4554 * a valid ICMP message 4555 */ 4556 if (DB_TYPE(mp) == M_CTL) { 4557 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4558 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4559 IN_PKTINFO) { 4560 /* 4561 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4562 * has been appended to the packet by IP. We need to 4563 * extract the mblk and adjust the rptr 4564 */ 4565 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4566 options_mp = mp; 4567 mp = mp->b_cont; 4568 rptr = mp->b_rptr; 4569 UDP_STAT(us, udp_in_pktinfo); 4570 } else { 4571 /* 4572 * ICMP messages. 4573 */ 4574 udp_icmp_error(q, mp); 4575 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4576 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4577 return; 4578 } 4579 } 4580 4581 mp_len = msgdsize(mp); 4582 /* 4583 * This is the inbound data path. 4584 * First, we check to make sure the IP version number is correct, 4585 * and then pull the IP and UDP headers into the first mblk. 4586 * Assume IP provides aligned packets - otherwise toss. 4587 * Also, check if we have a complete IP header. 4588 */ 4589 4590 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4591 ipp.ipp_fields = 0; 4592 4593 ipversion = IPH_HDR_VERSION(rptr); 4594 switch (ipversion) { 4595 case IPV4_VERSION: 4596 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4597 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4598 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4599 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4600 (udp->udp_ip_rcv_options_len)) { 4601 /* 4602 * Handle IPv4 packets with options outside of the 4603 * main data path. Not needed for AF_INET6 sockets 4604 * since they don't support a getsockopt of IP_OPTIONS. 4605 */ 4606 if (udp->udp_family == AF_INET6) 4607 break; 4608 /* 4609 * UDP length check performed for IPv4 packets with 4610 * options to check whether UDP length specified in 4611 * the header is the same as the physical length of 4612 * the packet. 4613 */ 4614 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4615 if (mp_len != (ntohs(udpha->uha_length) + 4616 hdr_length - UDPH_SIZE)) { 4617 goto tossit; 4618 } 4619 /* 4620 * Handle the case where the packet has IP options 4621 * and the IP_RECVSLLA & IP_RECVIF are set 4622 */ 4623 if (pinfo != NULL) 4624 mp = options_mp; 4625 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4626 SQTAG_UDP_INPUT); 4627 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4628 "udp_rput_end: q %p (%S)", q, "end"); 4629 return; 4630 } 4631 4632 /* Handle IPV6_RECVHOPLIMIT. */ 4633 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4634 udp->udp_ip_recvpktinfo) { 4635 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4636 ipp.ipp_fields |= IPPF_IFINDEX; 4637 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4638 } 4639 } 4640 break; 4641 case IPV6_VERSION: 4642 /* 4643 * IPv6 packets can only be received by applications 4644 * that are prepared to receive IPv6 addresses. 4645 * The IP fanout must ensure this. 4646 */ 4647 ASSERT(udp->udp_family == AF_INET6); 4648 4649 ip6h = (ip6_t *)rptr; 4650 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4651 4652 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4653 uint8_t nexthdrp; 4654 /* Look for ifindex information */ 4655 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4656 ip6i = (ip6i_t *)ip6h; 4657 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4658 goto tossit; 4659 4660 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4661 ASSERT(ip6i->ip6i_ifindex != 0); 4662 ipp.ipp_fields |= IPPF_IFINDEX; 4663 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4664 } 4665 rptr = (uchar_t *)&ip6i[1]; 4666 mp->b_rptr = rptr; 4667 if (rptr == mp->b_wptr) { 4668 mp1 = mp->b_cont; 4669 freeb(mp); 4670 mp = mp1; 4671 rptr = mp->b_rptr; 4672 } 4673 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4674 goto tossit; 4675 ip6h = (ip6_t *)rptr; 4676 mp_len = msgdsize(mp); 4677 } 4678 /* 4679 * Find any potentially interesting extension headers 4680 * as well as the length of the IPv6 + extension 4681 * headers. 4682 */ 4683 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4684 UDPH_SIZE; 4685 ASSERT(nexthdrp == IPPROTO_UDP); 4686 } else { 4687 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4688 ip6i = NULL; 4689 } 4690 break; 4691 default: 4692 ASSERT(0); 4693 } 4694 4695 /* 4696 * IP inspected the UDP header thus all of it must be in the mblk. 4697 * UDP length check is performed for IPv6 packets and IPv4 packets 4698 * without options to check if the size of the packet as specified 4699 * by the header is the same as the physical size of the packet. 4700 */ 4701 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4702 if ((MBLKL(mp) < hdr_length) || 4703 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4704 goto tossit; 4705 } 4706 4707 /* Walk past the headers. */ 4708 if (!udp->udp_rcvhdr) { 4709 mp->b_rptr = rptr + hdr_length; 4710 mp_len -= hdr_length; 4711 } 4712 4713 /* 4714 * This is the inbound data path. Packets are passed upstream as 4715 * T_UNITDATA_IND messages with full IP headers still attached. 4716 */ 4717 if (udp->udp_family == AF_INET) { 4718 sin_t *sin; 4719 4720 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4721 4722 /* 4723 * Normally only send up the address. 4724 * If IP_RECVDSTADDR is set we include the destination IP 4725 * address as an option. With IP_RECVOPTS we include all 4726 * the IP options. Only ip_rput_other() handles packets 4727 * that contain IP options. 4728 */ 4729 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4730 if (udp->udp_recvdstaddr) { 4731 udi_size += sizeof (struct T_opthdr) + 4732 sizeof (struct in_addr); 4733 UDP_STAT(us, udp_in_recvdstaddr); 4734 } 4735 4736 if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && 4737 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4738 udi_size += sizeof (struct T_opthdr) + 4739 sizeof (struct in_pktinfo); 4740 UDP_STAT(us, udp_ip_recvpktinfo); 4741 } 4742 4743 /* 4744 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4745 * space accordingly 4746 */ 4747 if (udp->udp_recvif && (pinfo != NULL) && 4748 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4749 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4750 UDP_STAT(us, udp_in_recvif); 4751 } 4752 4753 if (udp->udp_recvslla && (pinfo != NULL) && 4754 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4755 udi_size += sizeof (struct T_opthdr) + 4756 sizeof (struct sockaddr_dl); 4757 UDP_STAT(us, udp_in_recvslla); 4758 } 4759 4760 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4761 udi_size += sizeof (struct T_opthdr) + ucredsize; 4762 cpid = DB_CPID(mp); 4763 UDP_STAT(us, udp_in_recvucred); 4764 } 4765 4766 /* 4767 * If SO_TIMESTAMP is set allocate the appropriate sized 4768 * buffer. Since gethrestime() expects a pointer aligned 4769 * argument, we allocate space necessary for extra 4770 * alignment (even though it might not be used). 4771 */ 4772 if (udp->udp_timestamp) { 4773 udi_size += sizeof (struct T_opthdr) + 4774 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4775 UDP_STAT(us, udp_in_timestamp); 4776 } 4777 4778 /* 4779 * If IP_RECVTTL is set allocate the appropriate sized buffer 4780 */ 4781 if (udp->udp_recvttl) { 4782 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4783 UDP_STAT(us, udp_in_recvttl); 4784 } 4785 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4786 4787 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4788 mp1 = allocb(udi_size, BPRI_MED); 4789 if (mp1 == NULL) { 4790 freemsg(mp); 4791 if (options_mp != NULL) 4792 freeb(options_mp); 4793 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4794 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4795 BUMP_MIB(&udp->udp_mib, udpInErrors); 4796 return; 4797 } 4798 mp1->b_cont = mp; 4799 mp = mp1; 4800 mp->b_datap->db_type = M_PROTO; 4801 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4802 mp->b_wptr = (uchar_t *)tudi + udi_size; 4803 tudi->PRIM_type = T_UNITDATA_IND; 4804 tudi->SRC_length = sizeof (sin_t); 4805 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4806 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4807 sizeof (sin_t); 4808 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4809 tudi->OPT_length = udi_size; 4810 sin = (sin_t *)&tudi[1]; 4811 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4812 sin->sin_port = udpha->uha_src_port; 4813 sin->sin_family = udp->udp_family; 4814 *(uint32_t *)&sin->sin_zero[0] = 0; 4815 *(uint32_t *)&sin->sin_zero[4] = 0; 4816 4817 /* 4818 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4819 * IP_RECVTTL has been set. 4820 */ 4821 if (udi_size != 0) { 4822 /* 4823 * Copy in destination address before options to avoid 4824 * any padding issues. 4825 */ 4826 char *dstopt; 4827 4828 dstopt = (char *)&sin[1]; 4829 if (udp->udp_recvdstaddr) { 4830 struct T_opthdr *toh; 4831 ipaddr_t *dstptr; 4832 4833 toh = (struct T_opthdr *)dstopt; 4834 toh->level = IPPROTO_IP; 4835 toh->name = IP_RECVDSTADDR; 4836 toh->len = sizeof (struct T_opthdr) + 4837 sizeof (ipaddr_t); 4838 toh->status = 0; 4839 dstopt += sizeof (struct T_opthdr); 4840 dstptr = (ipaddr_t *)dstopt; 4841 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4842 dstopt = (char *)toh + toh->len; 4843 udi_size -= toh->len; 4844 } 4845 4846 if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && 4847 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4848 struct T_opthdr *toh; 4849 struct in_pktinfo *pktinfop; 4850 4851 toh = (struct T_opthdr *)dstopt; 4852 toh->level = IPPROTO_IP; 4853 toh->name = IP_PKTINFO; 4854 toh->len = sizeof (struct T_opthdr) + 4855 sizeof (*pktinfop); 4856 toh->status = 0; 4857 dstopt += sizeof (struct T_opthdr); 4858 pktinfop = (struct in_pktinfo *)dstopt; 4859 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4860 pktinfop->ipi_spec_dst = 4861 pinfo->ip_pkt_match_addr; 4862 pktinfop->ipi_addr.s_addr = 4863 ((ipha_t *)rptr)->ipha_dst; 4864 4865 dstopt += sizeof (struct in_pktinfo); 4866 udi_size -= toh->len; 4867 } 4868 4869 if (udp->udp_recvslla && (pinfo != NULL) && 4870 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4871 4872 struct T_opthdr *toh; 4873 struct sockaddr_dl *dstptr; 4874 4875 toh = (struct T_opthdr *)dstopt; 4876 toh->level = IPPROTO_IP; 4877 toh->name = IP_RECVSLLA; 4878 toh->len = sizeof (struct T_opthdr) + 4879 sizeof (struct sockaddr_dl); 4880 toh->status = 0; 4881 dstopt += sizeof (struct T_opthdr); 4882 dstptr = (struct sockaddr_dl *)dstopt; 4883 bcopy(&pinfo->ip_pkt_slla, dstptr, 4884 sizeof (struct sockaddr_dl)); 4885 dstopt = (char *)toh + toh->len; 4886 udi_size -= toh->len; 4887 } 4888 4889 if (udp->udp_recvif && (pinfo != NULL) && 4890 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4891 4892 struct T_opthdr *toh; 4893 uint_t *dstptr; 4894 4895 toh = (struct T_opthdr *)dstopt; 4896 toh->level = IPPROTO_IP; 4897 toh->name = IP_RECVIF; 4898 toh->len = sizeof (struct T_opthdr) + 4899 sizeof (uint_t); 4900 toh->status = 0; 4901 dstopt += sizeof (struct T_opthdr); 4902 dstptr = (uint_t *)dstopt; 4903 *dstptr = pinfo->ip_pkt_ifindex; 4904 dstopt = (char *)toh + toh->len; 4905 udi_size -= toh->len; 4906 } 4907 4908 if (cr != NULL) { 4909 struct T_opthdr *toh; 4910 4911 toh = (struct T_opthdr *)dstopt; 4912 toh->level = SOL_SOCKET; 4913 toh->name = SCM_UCRED; 4914 toh->len = sizeof (struct T_opthdr) + ucredsize; 4915 toh->status = 0; 4916 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4917 dstopt = (char *)toh + toh->len; 4918 udi_size -= toh->len; 4919 } 4920 4921 if (udp->udp_timestamp) { 4922 struct T_opthdr *toh; 4923 4924 toh = (struct T_opthdr *)dstopt; 4925 toh->level = SOL_SOCKET; 4926 toh->name = SCM_TIMESTAMP; 4927 toh->len = sizeof (struct T_opthdr) + 4928 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4929 toh->status = 0; 4930 dstopt += sizeof (struct T_opthdr); 4931 /* Align for gethrestime() */ 4932 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4933 sizeof (intptr_t)); 4934 gethrestime((timestruc_t *)dstopt); 4935 dstopt = (char *)toh + toh->len; 4936 udi_size -= toh->len; 4937 } 4938 4939 /* 4940 * CAUTION: 4941 * Due to aligment issues 4942 * Processing of IP_RECVTTL option 4943 * should always be the last. Adding 4944 * any option processing after this will 4945 * cause alignment panic. 4946 */ 4947 if (udp->udp_recvttl) { 4948 struct T_opthdr *toh; 4949 uint8_t *dstptr; 4950 4951 toh = (struct T_opthdr *)dstopt; 4952 toh->level = IPPROTO_IP; 4953 toh->name = IP_RECVTTL; 4954 toh->len = sizeof (struct T_opthdr) + 4955 sizeof (uint8_t); 4956 toh->status = 0; 4957 dstopt += sizeof (struct T_opthdr); 4958 dstptr = (uint8_t *)dstopt; 4959 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4960 dstopt = (char *)toh + toh->len; 4961 udi_size -= toh->len; 4962 } 4963 4964 /* Consumed all of allocated space */ 4965 ASSERT(udi_size == 0); 4966 } 4967 } else { 4968 sin6_t *sin6; 4969 4970 /* 4971 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4972 * 4973 * Normally we only send up the address. If receiving of any 4974 * optional receive side information is enabled, we also send 4975 * that up as options. 4976 * [ Only udp_rput_other() handles packets that contain IP 4977 * options so code to account for does not appear immediately 4978 * below but elsewhere ] 4979 */ 4980 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4981 4982 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4983 IPPF_RTHDR|IPPF_IFINDEX)) { 4984 if (udp->udp_ipv6_recvhopopts && 4985 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4986 size_t hlen; 4987 4988 UDP_STAT(us, udp_in_recvhopopts); 4989 hlen = copy_hop_opts(&ipp, NULL); 4990 if (hlen == 0) 4991 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4992 udi_size += hlen; 4993 } 4994 if ((udp->udp_ipv6_recvdstopts || 4995 udp->udp_old_ipv6_recvdstopts) && 4996 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4997 udi_size += sizeof (struct T_opthdr) + 4998 ipp.ipp_dstoptslen; 4999 UDP_STAT(us, udp_in_recvdstopts); 5000 } 5001 if (((udp->udp_ipv6_recvdstopts && 5002 udp->udp_ipv6_recvrthdr && 5003 (ipp.ipp_fields & IPPF_RTHDR)) || 5004 udp->udp_ipv6_recvrthdrdstopts) && 5005 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5006 udi_size += sizeof (struct T_opthdr) + 5007 ipp.ipp_rtdstoptslen; 5008 UDP_STAT(us, udp_in_recvrtdstopts); 5009 } 5010 if (udp->udp_ipv6_recvrthdr && 5011 (ipp.ipp_fields & IPPF_RTHDR)) { 5012 udi_size += sizeof (struct T_opthdr) + 5013 ipp.ipp_rthdrlen; 5014 UDP_STAT(us, udp_in_recvrthdr); 5015 } 5016 if (udp->udp_ip_recvpktinfo && 5017 (ipp.ipp_fields & IPPF_IFINDEX)) { 5018 udi_size += sizeof (struct T_opthdr) + 5019 sizeof (struct in6_pktinfo); 5020 UDP_STAT(us, udp_in_recvpktinfo); 5021 } 5022 5023 } 5024 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5025 udi_size += sizeof (struct T_opthdr) + ucredsize; 5026 cpid = DB_CPID(mp); 5027 UDP_STAT(us, udp_in_recvucred); 5028 } 5029 5030 if (udp->udp_ipv6_recvhoplimit) { 5031 udi_size += sizeof (struct T_opthdr) + sizeof (int); 5032 UDP_STAT(us, udp_in_recvhoplimit); 5033 } 5034 5035 if (udp->udp_ipv6_recvtclass) { 5036 udi_size += sizeof (struct T_opthdr) + sizeof (int); 5037 UDP_STAT(us, udp_in_recvtclass); 5038 } 5039 5040 mp1 = allocb(udi_size, BPRI_MED); 5041 if (mp1 == NULL) { 5042 freemsg(mp); 5043 if (options_mp != NULL) 5044 freeb(options_mp); 5045 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5046 "udp_rput_end: q %p (%S)", q, "allocbfail"); 5047 BUMP_MIB(&udp->udp_mib, udpInErrors); 5048 return; 5049 } 5050 mp1->b_cont = mp; 5051 mp = mp1; 5052 mp->b_datap->db_type = M_PROTO; 5053 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5054 mp->b_wptr = (uchar_t *)tudi + udi_size; 5055 tudi->PRIM_type = T_UNITDATA_IND; 5056 tudi->SRC_length = sizeof (sin6_t); 5057 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5058 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 5059 sizeof (sin6_t); 5060 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 5061 tudi->OPT_length = udi_size; 5062 sin6 = (sin6_t *)&tudi[1]; 5063 if (ipversion == IPV4_VERSION) { 5064 in6_addr_t v6dst; 5065 5066 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 5067 &sin6->sin6_addr); 5068 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 5069 &v6dst); 5070 sin6->sin6_flowinfo = 0; 5071 sin6->sin6_scope_id = 0; 5072 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 5073 connp->conn_zoneid, us->us_netstack); 5074 } else { 5075 sin6->sin6_addr = ip6h->ip6_src; 5076 /* No sin6_flowinfo per API */ 5077 sin6->sin6_flowinfo = 0; 5078 /* For link-scope source pass up scope id */ 5079 if ((ipp.ipp_fields & IPPF_IFINDEX) && 5080 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 5081 sin6->sin6_scope_id = ipp.ipp_ifindex; 5082 else 5083 sin6->sin6_scope_id = 0; 5084 sin6->__sin6_src_id = ip_srcid_find_addr( 5085 &ip6h->ip6_dst, connp->conn_zoneid, 5086 us->us_netstack); 5087 } 5088 sin6->sin6_port = udpha->uha_src_port; 5089 sin6->sin6_family = udp->udp_family; 5090 5091 if (udi_size != 0) { 5092 uchar_t *dstopt; 5093 5094 dstopt = (uchar_t *)&sin6[1]; 5095 if (udp->udp_ip_recvpktinfo && 5096 (ipp.ipp_fields & IPPF_IFINDEX)) { 5097 struct T_opthdr *toh; 5098 struct in6_pktinfo *pkti; 5099 5100 toh = (struct T_opthdr *)dstopt; 5101 toh->level = IPPROTO_IPV6; 5102 toh->name = IPV6_PKTINFO; 5103 toh->len = sizeof (struct T_opthdr) + 5104 sizeof (*pkti); 5105 toh->status = 0; 5106 dstopt += sizeof (struct T_opthdr); 5107 pkti = (struct in6_pktinfo *)dstopt; 5108 if (ipversion == IPV6_VERSION) 5109 pkti->ipi6_addr = ip6h->ip6_dst; 5110 else 5111 IN6_IPADDR_TO_V4MAPPED( 5112 ((ipha_t *)rptr)->ipha_dst, 5113 &pkti->ipi6_addr); 5114 pkti->ipi6_ifindex = ipp.ipp_ifindex; 5115 dstopt += sizeof (*pkti); 5116 udi_size -= toh->len; 5117 } 5118 if (udp->udp_ipv6_recvhoplimit) { 5119 struct T_opthdr *toh; 5120 5121 toh = (struct T_opthdr *)dstopt; 5122 toh->level = IPPROTO_IPV6; 5123 toh->name = IPV6_HOPLIMIT; 5124 toh->len = sizeof (struct T_opthdr) + 5125 sizeof (uint_t); 5126 toh->status = 0; 5127 dstopt += sizeof (struct T_opthdr); 5128 if (ipversion == IPV6_VERSION) 5129 *(uint_t *)dstopt = ip6h->ip6_hops; 5130 else 5131 *(uint_t *)dstopt = 5132 ((ipha_t *)rptr)->ipha_ttl; 5133 dstopt += sizeof (uint_t); 5134 udi_size -= toh->len; 5135 } 5136 if (udp->udp_ipv6_recvtclass) { 5137 struct T_opthdr *toh; 5138 5139 toh = (struct T_opthdr *)dstopt; 5140 toh->level = IPPROTO_IPV6; 5141 toh->name = IPV6_TCLASS; 5142 toh->len = sizeof (struct T_opthdr) + 5143 sizeof (uint_t); 5144 toh->status = 0; 5145 dstopt += sizeof (struct T_opthdr); 5146 if (ipversion == IPV6_VERSION) { 5147 *(uint_t *)dstopt = 5148 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 5149 } else { 5150 ipha_t *ipha = (ipha_t *)rptr; 5151 *(uint_t *)dstopt = 5152 ipha->ipha_type_of_service; 5153 } 5154 dstopt += sizeof (uint_t); 5155 udi_size -= toh->len; 5156 } 5157 if (udp->udp_ipv6_recvhopopts && 5158 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5159 size_t hlen; 5160 5161 hlen = copy_hop_opts(&ipp, dstopt); 5162 dstopt += hlen; 5163 udi_size -= hlen; 5164 } 5165 if (udp->udp_ipv6_recvdstopts && 5166 udp->udp_ipv6_recvrthdr && 5167 (ipp.ipp_fields & IPPF_RTHDR) && 5168 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5169 struct T_opthdr *toh; 5170 5171 toh = (struct T_opthdr *)dstopt; 5172 toh->level = IPPROTO_IPV6; 5173 toh->name = IPV6_DSTOPTS; 5174 toh->len = sizeof (struct T_opthdr) + 5175 ipp.ipp_rtdstoptslen; 5176 toh->status = 0; 5177 dstopt += sizeof (struct T_opthdr); 5178 bcopy(ipp.ipp_rtdstopts, dstopt, 5179 ipp.ipp_rtdstoptslen); 5180 dstopt += ipp.ipp_rtdstoptslen; 5181 udi_size -= toh->len; 5182 } 5183 if (udp->udp_ipv6_recvrthdr && 5184 (ipp.ipp_fields & IPPF_RTHDR)) { 5185 struct T_opthdr *toh; 5186 5187 toh = (struct T_opthdr *)dstopt; 5188 toh->level = IPPROTO_IPV6; 5189 toh->name = IPV6_RTHDR; 5190 toh->len = sizeof (struct T_opthdr) + 5191 ipp.ipp_rthdrlen; 5192 toh->status = 0; 5193 dstopt += sizeof (struct T_opthdr); 5194 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 5195 dstopt += ipp.ipp_rthdrlen; 5196 udi_size -= toh->len; 5197 } 5198 if (udp->udp_ipv6_recvdstopts && 5199 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5200 struct T_opthdr *toh; 5201 5202 toh = (struct T_opthdr *)dstopt; 5203 toh->level = IPPROTO_IPV6; 5204 toh->name = IPV6_DSTOPTS; 5205 toh->len = sizeof (struct T_opthdr) + 5206 ipp.ipp_dstoptslen; 5207 toh->status = 0; 5208 dstopt += sizeof (struct T_opthdr); 5209 bcopy(ipp.ipp_dstopts, dstopt, 5210 ipp.ipp_dstoptslen); 5211 dstopt += ipp.ipp_dstoptslen; 5212 udi_size -= toh->len; 5213 } 5214 5215 if (cr != NULL) { 5216 struct T_opthdr *toh; 5217 5218 toh = (struct T_opthdr *)dstopt; 5219 toh->level = SOL_SOCKET; 5220 toh->name = SCM_UCRED; 5221 toh->len = sizeof (struct T_opthdr) + ucredsize; 5222 toh->status = 0; 5223 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5224 dstopt += toh->len; 5225 udi_size -= toh->len; 5226 } 5227 /* Consumed all of allocated space */ 5228 ASSERT(udi_size == 0); 5229 } 5230 #undef sin6 5231 /* No IP_RECVDSTADDR for IPv6. */ 5232 } 5233 5234 BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); 5235 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5236 "udp_rput_end: q %p (%S)", q, "end"); 5237 if (options_mp != NULL) 5238 freeb(options_mp); 5239 5240 if (udp->udp_direct_sockfs) { 5241 /* 5242 * There is nothing above us except for the stream head; 5243 * use the read-side synchronous stream interface in 5244 * order to reduce the time spent in interrupt thread. 5245 */ 5246 ASSERT(udp->udp_issocket); 5247 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 5248 } else { 5249 /* 5250 * Use regular STREAMS interface to pass data upstream 5251 * if this is not a socket endpoint, or if we have 5252 * switched over to the slow mode due to sockmod being 5253 * popped or a module being pushed on top of us. 5254 */ 5255 putnext(UDP_RD(q), mp); 5256 } 5257 return; 5258 5259 tossit: 5260 freemsg(mp); 5261 if (options_mp != NULL) 5262 freeb(options_mp); 5263 BUMP_MIB(&udp->udp_mib, udpInErrors); 5264 } 5265 5266 void 5267 udp_conn_recv(conn_t *connp, mblk_t *mp) 5268 { 5269 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 5270 } 5271 5272 /* ARGSUSED */ 5273 static void 5274 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 5275 { 5276 udp_input((conn_t *)arg, mp); 5277 _UDP_EXIT((conn_t *)arg); 5278 } 5279 5280 /* 5281 * Process non-M_DATA messages as well as M_DATA messages that requires 5282 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 5283 */ 5284 static void 5285 udp_rput_other(queue_t *q, mblk_t *mp) 5286 { 5287 struct T_unitdata_ind *tudi; 5288 mblk_t *mp1; 5289 uchar_t *rptr; 5290 uchar_t *new_rptr; 5291 int hdr_length; 5292 int udi_size; /* Size of T_unitdata_ind */ 5293 int opt_len; /* Length of IP options */ 5294 sin_t *sin; 5295 struct T_error_ack *tea; 5296 mblk_t *options_mp = NULL; 5297 ip_pktinfo_t *pinfo; 5298 boolean_t recv_on = B_FALSE; 5299 cred_t *cr = NULL; 5300 udp_t *udp = Q_TO_UDP(q); 5301 pid_t cpid; 5302 cred_t *rcr = udp->udp_connp->conn_cred; 5303 udp_stack_t *us = udp->udp_us; 5304 5305 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 5306 "udp_rput_other: q %p mp %p", q, mp); 5307 5308 ASSERT(OK_32PTR(mp->b_rptr)); 5309 rptr = mp->b_rptr; 5310 5311 switch (mp->b_datap->db_type) { 5312 case M_CTL: 5313 /* 5314 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 5315 */ 5316 recv_on = B_TRUE; 5317 options_mp = mp; 5318 pinfo = (ip_pktinfo_t *)options_mp->b_rptr; 5319 5320 /* 5321 * The actual data is in mp->b_cont 5322 */ 5323 mp = mp->b_cont; 5324 ASSERT(OK_32PTR(mp->b_rptr)); 5325 rptr = mp->b_rptr; 5326 break; 5327 case M_DATA: 5328 /* 5329 * M_DATA messages contain IPv4 datagrams. They are handled 5330 * after this switch. 5331 */ 5332 break; 5333 case M_PROTO: 5334 case M_PCPROTO: 5335 /* M_PROTO messages contain some type of TPI message. */ 5336 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 5337 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5338 freemsg(mp); 5339 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5340 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 5341 return; 5342 } 5343 tea = (struct T_error_ack *)rptr; 5344 5345 switch (tea->PRIM_type) { 5346 case T_ERROR_ACK: 5347 switch (tea->ERROR_prim) { 5348 case O_T_BIND_REQ: 5349 case T_BIND_REQ: { 5350 /* 5351 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5352 * clear out the associated port and source 5353 * address before passing the message 5354 * upstream. If this was caused by a T_CONN_REQ 5355 * revert back to bound state. 5356 */ 5357 udp_fanout_t *udpf; 5358 5359 udpf = &us->us_bind_fanout[ 5360 UDP_BIND_HASH(udp->udp_port, 5361 us->us_bind_fanout_size)]; 5362 mutex_enter(&udpf->uf_lock); 5363 if (udp->udp_state == TS_DATA_XFER) { 5364 /* Connect failed */ 5365 tea->ERROR_prim = T_CONN_REQ; 5366 /* Revert back to the bound source */ 5367 udp->udp_v6src = udp->udp_bound_v6src; 5368 udp->udp_state = TS_IDLE; 5369 mutex_exit(&udpf->uf_lock); 5370 if (udp->udp_family == AF_INET6) 5371 (void) udp_build_hdrs(q, udp); 5372 break; 5373 } 5374 5375 if (udp->udp_discon_pending) { 5376 tea->ERROR_prim = T_DISCON_REQ; 5377 udp->udp_discon_pending = 0; 5378 } 5379 V6_SET_ZERO(udp->udp_v6src); 5380 V6_SET_ZERO(udp->udp_bound_v6src); 5381 udp->udp_state = TS_UNBND; 5382 udp_bind_hash_remove(udp, B_TRUE); 5383 udp->udp_port = 0; 5384 mutex_exit(&udpf->uf_lock); 5385 if (udp->udp_family == AF_INET6) 5386 (void) udp_build_hdrs(q, udp); 5387 break; 5388 } 5389 default: 5390 break; 5391 } 5392 break; 5393 case T_BIND_ACK: 5394 udp_rput_bind_ack(q, mp); 5395 return; 5396 5397 case T_OPTMGMT_ACK: 5398 case T_OK_ACK: 5399 break; 5400 default: 5401 freemsg(mp); 5402 return; 5403 } 5404 putnext(UDP_RD(q), mp); 5405 return; 5406 } 5407 5408 /* 5409 * This is the inbound data path. 5410 * First, we make sure the data contains both IP and UDP headers. 5411 * 5412 * This handle IPv4 packets for only AF_INET sockets. 5413 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 5414 * is no need saving the options. 5415 */ 5416 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 5417 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 5418 if (mp->b_wptr - rptr < hdr_length) { 5419 if (!pullupmsg(mp, hdr_length)) { 5420 freemsg(mp); 5421 if (options_mp != NULL) 5422 freeb(options_mp); 5423 BUMP_MIB(&udp->udp_mib, udpInErrors); 5424 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5425 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5426 return; 5427 } 5428 rptr = mp->b_rptr; 5429 } 5430 /* Walk past the headers. */ 5431 new_rptr = rptr + hdr_length; 5432 if (!udp->udp_rcvhdr) 5433 mp->b_rptr = new_rptr; 5434 5435 /* Save the options if any */ 5436 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5437 if (opt_len > 0) { 5438 if (opt_len > udp->udp_ip_rcv_options_len) { 5439 if (udp->udp_ip_rcv_options_len) 5440 mi_free((char *)udp->udp_ip_rcv_options); 5441 udp->udp_ip_rcv_options_len = 0; 5442 udp->udp_ip_rcv_options = 5443 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5444 if (udp->udp_ip_rcv_options) 5445 udp->udp_ip_rcv_options_len = opt_len; 5446 } 5447 if (udp->udp_ip_rcv_options_len) { 5448 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5449 udp->udp_ip_rcv_options, opt_len); 5450 /* Adjust length if we are resusing the space */ 5451 udp->udp_ip_rcv_options_len = opt_len; 5452 } 5453 } else if (udp->udp_ip_rcv_options_len) { 5454 mi_free((char *)udp->udp_ip_rcv_options); 5455 udp->udp_ip_rcv_options = NULL; 5456 udp->udp_ip_rcv_options_len = 0; 5457 } 5458 5459 /* 5460 * Normally only send up the address. 5461 * If IP_RECVDSTADDR is set we include the destination IP 5462 * address as an option. With IP_RECVOPTS we include all 5463 * the IP options. 5464 */ 5465 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5466 if (udp->udp_recvdstaddr) { 5467 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5468 UDP_STAT(us, udp_in_recvdstaddr); 5469 } 5470 5471 if (udp->udp_ip_recvpktinfo && recv_on && 5472 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 5473 udi_size += sizeof (struct T_opthdr) + 5474 sizeof (struct in_pktinfo); 5475 UDP_STAT(us, udp_ip_recvpktinfo); 5476 } 5477 5478 if (udp->udp_recvopts && opt_len > 0) { 5479 udi_size += sizeof (struct T_opthdr) + opt_len; 5480 UDP_STAT(us, udp_in_recvopts); 5481 } 5482 5483 /* 5484 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5485 * space accordingly 5486 */ 5487 if (udp->udp_recvif && recv_on && 5488 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 5489 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5490 UDP_STAT(us, udp_in_recvif); 5491 } 5492 5493 if (udp->udp_recvslla && recv_on && 5494 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 5495 udi_size += sizeof (struct T_opthdr) + 5496 sizeof (struct sockaddr_dl); 5497 UDP_STAT(us, udp_in_recvslla); 5498 } 5499 5500 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5501 udi_size += sizeof (struct T_opthdr) + ucredsize; 5502 cpid = DB_CPID(mp); 5503 UDP_STAT(us, udp_in_recvucred); 5504 } 5505 /* 5506 * If IP_RECVTTL is set allocate the appropriate sized buffer 5507 */ 5508 if (udp->udp_recvttl) { 5509 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5510 UDP_STAT(us, udp_in_recvttl); 5511 } 5512 5513 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5514 mp1 = allocb(udi_size, BPRI_MED); 5515 if (mp1 == NULL) { 5516 freemsg(mp); 5517 if (options_mp != NULL) 5518 freeb(options_mp); 5519 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5520 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5521 BUMP_MIB(&udp->udp_mib, udpInErrors); 5522 return; 5523 } 5524 mp1->b_cont = mp; 5525 mp = mp1; 5526 mp->b_datap->db_type = M_PROTO; 5527 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5528 mp->b_wptr = (uchar_t *)tudi + udi_size; 5529 tudi->PRIM_type = T_UNITDATA_IND; 5530 tudi->SRC_length = sizeof (sin_t); 5531 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5532 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5533 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5534 tudi->OPT_length = udi_size; 5535 5536 sin = (sin_t *)&tudi[1]; 5537 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5538 sin->sin_port = ((in_port_t *) 5539 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5540 sin->sin_family = AF_INET; 5541 *(uint32_t *)&sin->sin_zero[0] = 0; 5542 *(uint32_t *)&sin->sin_zero[4] = 0; 5543 5544 /* 5545 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5546 * IP_RECVTTL has been set. 5547 */ 5548 if (udi_size != 0) { 5549 /* 5550 * Copy in destination address before options to avoid any 5551 * padding issues. 5552 */ 5553 char *dstopt; 5554 5555 dstopt = (char *)&sin[1]; 5556 if (udp->udp_recvdstaddr) { 5557 struct T_opthdr *toh; 5558 ipaddr_t *dstptr; 5559 5560 toh = (struct T_opthdr *)dstopt; 5561 toh->level = IPPROTO_IP; 5562 toh->name = IP_RECVDSTADDR; 5563 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5564 toh->status = 0; 5565 dstopt += sizeof (struct T_opthdr); 5566 dstptr = (ipaddr_t *)dstopt; 5567 *dstptr = (((ipaddr_t *)rptr)[4]); 5568 dstopt += sizeof (ipaddr_t); 5569 udi_size -= toh->len; 5570 } 5571 if (udp->udp_recvopts && udi_size != 0) { 5572 struct T_opthdr *toh; 5573 5574 toh = (struct T_opthdr *)dstopt; 5575 toh->level = IPPROTO_IP; 5576 toh->name = IP_RECVOPTS; 5577 toh->len = sizeof (struct T_opthdr) + opt_len; 5578 toh->status = 0; 5579 dstopt += sizeof (struct T_opthdr); 5580 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5581 dstopt += opt_len; 5582 udi_size -= toh->len; 5583 } 5584 if (udp->udp_ip_recvpktinfo && recv_on && 5585 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 5586 5587 struct T_opthdr *toh; 5588 struct in_pktinfo *pktinfop; 5589 5590 toh = (struct T_opthdr *)dstopt; 5591 toh->level = IPPROTO_IP; 5592 toh->name = IP_PKTINFO; 5593 toh->len = sizeof (struct T_opthdr) + 5594 sizeof (*pktinfop); 5595 toh->status = 0; 5596 dstopt += sizeof (struct T_opthdr); 5597 pktinfop = (struct in_pktinfo *)dstopt; 5598 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 5599 pktinfop->ipi_spec_dst = pinfo->ip_pkt_match_addr; 5600 5601 pktinfop->ipi_addr.s_addr = ((ipha_t *)rptr)->ipha_dst; 5602 5603 dstopt += sizeof (struct in_pktinfo); 5604 udi_size -= toh->len; 5605 } 5606 5607 if (udp->udp_recvslla && recv_on && 5608 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 5609 5610 struct T_opthdr *toh; 5611 struct sockaddr_dl *dstptr; 5612 5613 toh = (struct T_opthdr *)dstopt; 5614 toh->level = IPPROTO_IP; 5615 toh->name = IP_RECVSLLA; 5616 toh->len = sizeof (struct T_opthdr) + 5617 sizeof (struct sockaddr_dl); 5618 toh->status = 0; 5619 dstopt += sizeof (struct T_opthdr); 5620 dstptr = (struct sockaddr_dl *)dstopt; 5621 bcopy(&pinfo->ip_pkt_slla, dstptr, 5622 sizeof (struct sockaddr_dl)); 5623 dstopt += sizeof (struct sockaddr_dl); 5624 udi_size -= toh->len; 5625 } 5626 5627 if (udp->udp_recvif && recv_on && 5628 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 5629 5630 struct T_opthdr *toh; 5631 uint_t *dstptr; 5632 5633 toh = (struct T_opthdr *)dstopt; 5634 toh->level = IPPROTO_IP; 5635 toh->name = IP_RECVIF; 5636 toh->len = sizeof (struct T_opthdr) + 5637 sizeof (uint_t); 5638 toh->status = 0; 5639 dstopt += sizeof (struct T_opthdr); 5640 dstptr = (uint_t *)dstopt; 5641 *dstptr = pinfo->ip_pkt_ifindex; 5642 dstopt += sizeof (uint_t); 5643 udi_size -= toh->len; 5644 } 5645 5646 if (cr != NULL) { 5647 struct T_opthdr *toh; 5648 5649 toh = (struct T_opthdr *)dstopt; 5650 toh->level = SOL_SOCKET; 5651 toh->name = SCM_UCRED; 5652 toh->len = sizeof (struct T_opthdr) + ucredsize; 5653 toh->status = 0; 5654 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5655 dstopt += toh->len; 5656 udi_size -= toh->len; 5657 } 5658 5659 if (udp->udp_recvttl) { 5660 struct T_opthdr *toh; 5661 uint8_t *dstptr; 5662 5663 toh = (struct T_opthdr *)dstopt; 5664 toh->level = IPPROTO_IP; 5665 toh->name = IP_RECVTTL; 5666 toh->len = sizeof (struct T_opthdr) + 5667 sizeof (uint8_t); 5668 toh->status = 0; 5669 dstopt += sizeof (struct T_opthdr); 5670 dstptr = (uint8_t *)dstopt; 5671 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5672 dstopt += sizeof (uint8_t); 5673 udi_size -= toh->len; 5674 } 5675 5676 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5677 } 5678 BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); 5679 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5680 "udp_rput_other_end: q %p (%S)", q, "end"); 5681 if (options_mp != NULL) 5682 freeb(options_mp); 5683 5684 if (udp->udp_direct_sockfs) { 5685 /* 5686 * There is nothing above us except for the stream head; 5687 * use the read-side synchronous stream interface in 5688 * order to reduce the time spent in interrupt thread. 5689 */ 5690 ASSERT(udp->udp_issocket); 5691 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5692 } else { 5693 /* 5694 * Use regular STREAMS interface to pass data upstream 5695 * if this is not a socket endpoint, or if we have 5696 * switched over to the slow mode due to sockmod being 5697 * popped or a module being pushed on top of us. 5698 */ 5699 putnext(UDP_RD(q), mp); 5700 } 5701 } 5702 5703 /* ARGSUSED */ 5704 static void 5705 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5706 { 5707 conn_t *connp = arg; 5708 5709 udp_rput_other(connp->conn_rq, mp); 5710 udp_exit(connp); 5711 } 5712 5713 /* 5714 * Process a T_BIND_ACK 5715 */ 5716 static void 5717 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5718 { 5719 udp_t *udp = Q_TO_UDP(q); 5720 mblk_t *mp1; 5721 ire_t *ire; 5722 struct T_bind_ack *tba; 5723 uchar_t *addrp; 5724 ipa_conn_t *ac; 5725 ipa6_conn_t *ac6; 5726 5727 if (udp->udp_discon_pending) 5728 udp->udp_discon_pending = 0; 5729 5730 /* 5731 * If a broadcast/multicast address was bound set 5732 * the source address to 0. 5733 * This ensures no datagrams with broadcast address 5734 * as source address are emitted (which would violate 5735 * RFC1122 - Hosts requirements) 5736 * 5737 * Note that when connecting the returned IRE is 5738 * for the destination address and we only perform 5739 * the broadcast check for the source address (it 5740 * is OK to connect to a broadcast/multicast address.) 5741 */ 5742 mp1 = mp->b_cont; 5743 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5744 ire = (ire_t *)mp1->b_rptr; 5745 5746 /* 5747 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5748 * local address. 5749 */ 5750 if (ire->ire_type == IRE_BROADCAST && 5751 udp->udp_state != TS_DATA_XFER) { 5752 /* This was just a local bind to a broadcast addr */ 5753 V6_SET_ZERO(udp->udp_v6src); 5754 if (udp->udp_family == AF_INET6) 5755 (void) udp_build_hdrs(q, udp); 5756 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5757 /* 5758 * Local address not yet set - pick it from the 5759 * T_bind_ack 5760 */ 5761 tba = (struct T_bind_ack *)mp->b_rptr; 5762 addrp = &mp->b_rptr[tba->ADDR_offset]; 5763 switch (udp->udp_family) { 5764 case AF_INET: 5765 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5766 ac = (ipa_conn_t *)addrp; 5767 } else { 5768 ASSERT(tba->ADDR_length == 5769 sizeof (ipa_conn_x_t)); 5770 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5771 } 5772 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5773 &udp->udp_v6src); 5774 break; 5775 case AF_INET6: 5776 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5777 ac6 = (ipa6_conn_t *)addrp; 5778 } else { 5779 ASSERT(tba->ADDR_length == 5780 sizeof (ipa6_conn_x_t)); 5781 ac6 = &((ipa6_conn_x_t *) 5782 addrp)->ac6x_conn; 5783 } 5784 udp->udp_v6src = ac6->ac6_laddr; 5785 (void) udp_build_hdrs(q, udp); 5786 break; 5787 } 5788 } 5789 mp1 = mp1->b_cont; 5790 } 5791 /* 5792 * Look for one or more appended ACK message added by 5793 * udp_connect or udp_disconnect. 5794 * If none found just send up the T_BIND_ACK. 5795 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5796 * udp_disconnect has appended a T_OK_ACK. 5797 */ 5798 if (mp1 != NULL) { 5799 if (mp->b_cont == mp1) 5800 mp->b_cont = NULL; 5801 else { 5802 ASSERT(mp->b_cont->b_cont == mp1); 5803 mp->b_cont->b_cont = NULL; 5804 } 5805 freemsg(mp); 5806 mp = mp1; 5807 while (mp != NULL) { 5808 mp1 = mp->b_cont; 5809 mp->b_cont = NULL; 5810 putnext(UDP_RD(q), mp); 5811 mp = mp1; 5812 } 5813 return; 5814 } 5815 freemsg(mp->b_cont); 5816 mp->b_cont = NULL; 5817 putnext(UDP_RD(q), mp); 5818 } 5819 5820 /* 5821 * return SNMP stuff in buffer in mpdata 5822 */ 5823 int 5824 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5825 { 5826 mblk_t *mpdata; 5827 mblk_t *mp_conn_ctl; 5828 mblk_t *mp_attr_ctl; 5829 mblk_t *mp6_conn_ctl; 5830 mblk_t *mp6_attr_ctl; 5831 mblk_t *mp_conn_tail; 5832 mblk_t *mp_attr_tail; 5833 mblk_t *mp6_conn_tail; 5834 mblk_t *mp6_attr_tail; 5835 struct opthdr *optp; 5836 mib2_udpEntry_t ude; 5837 mib2_udp6Entry_t ude6; 5838 mib2_transportMLPEntry_t mlp; 5839 int state; 5840 zoneid_t zoneid; 5841 int i; 5842 connf_t *connfp; 5843 conn_t *connp = Q_TO_CONN(q); 5844 udp_t *udp = connp->conn_udp; 5845 int v4_conn_idx; 5846 int v6_conn_idx; 5847 boolean_t needattr; 5848 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5849 5850 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5851 if (mpctl == NULL || 5852 (mpdata = mpctl->b_cont) == NULL || 5853 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5854 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5855 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5856 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5857 freemsg(mp_conn_ctl); 5858 freemsg(mp_attr_ctl); 5859 freemsg(mp6_conn_ctl); 5860 return (0); 5861 } 5862 5863 zoneid = connp->conn_zoneid; 5864 5865 /* fixed length structure for IPv4 and IPv6 counters */ 5866 SET_MIB(udp->udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5867 SET_MIB(udp->udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5868 /* synchronize 64- and 32-bit counters */ 5869 SYNC32_MIB(&udp->udp_mib, udpInDatagrams, udpHCInDatagrams); 5870 SYNC32_MIB(&udp->udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5871 5872 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5873 optp->level = MIB2_UDP; 5874 optp->name = 0; 5875 (void) snmp_append_data(mpdata, (char *)&udp->udp_mib, 5876 sizeof (udp->udp_mib)); 5877 optp->len = msgdsize(mpdata); 5878 qreply(q, mpctl); 5879 5880 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5881 v4_conn_idx = v6_conn_idx = 0; 5882 5883 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5884 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5885 connp = NULL; 5886 5887 while ((connp = ipcl_get_next_conn(connfp, connp, 5888 IPCL_UDP))) { 5889 udp = connp->conn_udp; 5890 if (zoneid != connp->conn_zoneid) 5891 continue; 5892 5893 /* 5894 * Note that the port numbers are sent in 5895 * host byte order 5896 */ 5897 5898 if (udp->udp_state == TS_UNBND) 5899 state = MIB2_UDP_unbound; 5900 else if (udp->udp_state == TS_IDLE) 5901 state = MIB2_UDP_idle; 5902 else if (udp->udp_state == TS_DATA_XFER) 5903 state = MIB2_UDP_connected; 5904 else 5905 state = MIB2_UDP_unknown; 5906 5907 needattr = B_FALSE; 5908 bzero(&mlp, sizeof (mlp)); 5909 if (connp->conn_mlp_type != mlptSingle) { 5910 if (connp->conn_mlp_type == mlptShared || 5911 connp->conn_mlp_type == mlptBoth) 5912 mlp.tme_flags |= MIB2_TMEF_SHARED; 5913 if (connp->conn_mlp_type == mlptPrivate || 5914 connp->conn_mlp_type == mlptBoth) 5915 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5916 needattr = B_TRUE; 5917 } 5918 5919 /* 5920 * Create an IPv4 table entry for IPv4 entries and also 5921 * any IPv6 entries which are bound to in6addr_any 5922 * (i.e. anything a IPv4 peer could connect/send to). 5923 */ 5924 if (udp->udp_ipversion == IPV4_VERSION || 5925 (udp->udp_state <= TS_IDLE && 5926 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5927 ude.udpEntryInfo.ue_state = state; 5928 /* 5929 * If in6addr_any this will set it to 5930 * INADDR_ANY 5931 */ 5932 ude.udpLocalAddress = 5933 V4_PART_OF_V6(udp->udp_v6src); 5934 ude.udpLocalPort = ntohs(udp->udp_port); 5935 if (udp->udp_state == TS_DATA_XFER) { 5936 /* 5937 * Can potentially get here for 5938 * v6 socket if another process 5939 * (say, ping) has just done a 5940 * sendto(), changing the state 5941 * from the TS_IDLE above to 5942 * TS_DATA_XFER by the time we hit 5943 * this part of the code. 5944 */ 5945 ude.udpEntryInfo.ue_RemoteAddress = 5946 V4_PART_OF_V6(udp->udp_v6dst); 5947 ude.udpEntryInfo.ue_RemotePort = 5948 ntohs(udp->udp_dstport); 5949 } else { 5950 ude.udpEntryInfo.ue_RemoteAddress = 0; 5951 ude.udpEntryInfo.ue_RemotePort = 0; 5952 } 5953 5954 /* 5955 * We make the assumption that all udp_t 5956 * structs will be created within an address 5957 * region no larger than 32-bits. 5958 */ 5959 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5960 ude.udpCreationProcess = 5961 (udp->udp_open_pid < 0) ? 5962 MIB2_UNKNOWN_PROCESS : 5963 udp->udp_open_pid; 5964 ude.udpCreationTime = udp->udp_open_time; 5965 5966 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5967 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5968 mlp.tme_connidx = v4_conn_idx++; 5969 if (needattr) 5970 (void) snmp_append_data2( 5971 mp_attr_ctl->b_cont, &mp_attr_tail, 5972 (char *)&mlp, sizeof (mlp)); 5973 } 5974 if (udp->udp_ipversion == IPV6_VERSION) { 5975 ude6.udp6EntryInfo.ue_state = state; 5976 ude6.udp6LocalAddress = udp->udp_v6src; 5977 ude6.udp6LocalPort = ntohs(udp->udp_port); 5978 ude6.udp6IfIndex = udp->udp_bound_if; 5979 if (udp->udp_state == TS_DATA_XFER) { 5980 ude6.udp6EntryInfo.ue_RemoteAddress = 5981 udp->udp_v6dst; 5982 ude6.udp6EntryInfo.ue_RemotePort = 5983 ntohs(udp->udp_dstport); 5984 } else { 5985 ude6.udp6EntryInfo.ue_RemoteAddress = 5986 sin6_null.sin6_addr; 5987 ude6.udp6EntryInfo.ue_RemotePort = 0; 5988 } 5989 /* 5990 * We make the assumption that all udp_t 5991 * structs will be created within an address 5992 * region no larger than 32-bits. 5993 */ 5994 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 5995 ude6.udp6CreationProcess = 5996 (udp->udp_open_pid < 0) ? 5997 MIB2_UNKNOWN_PROCESS : 5998 udp->udp_open_pid; 5999 ude6.udp6CreationTime = udp->udp_open_time; 6000 6001 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 6002 &mp6_conn_tail, (char *)&ude6, 6003 sizeof (ude6)); 6004 mlp.tme_connidx = v6_conn_idx++; 6005 if (needattr) 6006 (void) snmp_append_data2( 6007 mp6_attr_ctl->b_cont, 6008 &mp6_attr_tail, (char *)&mlp, 6009 sizeof (mlp)); 6010 } 6011 } 6012 } 6013 6014 /* IPv4 UDP endpoints */ 6015 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 6016 sizeof (struct T_optmgmt_ack)]; 6017 optp->level = MIB2_UDP; 6018 optp->name = MIB2_UDP_ENTRY; 6019 optp->len = msgdsize(mp_conn_ctl->b_cont); 6020 qreply(q, mp_conn_ctl); 6021 6022 /* table of MLP attributes... */ 6023 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 6024 sizeof (struct T_optmgmt_ack)]; 6025 optp->level = MIB2_UDP; 6026 optp->name = EXPER_XPORT_MLP; 6027 optp->len = msgdsize(mp_attr_ctl->b_cont); 6028 if (optp->len == 0) 6029 freemsg(mp_attr_ctl); 6030 else 6031 qreply(q, mp_attr_ctl); 6032 6033 /* IPv6 UDP endpoints */ 6034 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 6035 sizeof (struct T_optmgmt_ack)]; 6036 optp->level = MIB2_UDP6; 6037 optp->name = MIB2_UDP6_ENTRY; 6038 optp->len = msgdsize(mp6_conn_ctl->b_cont); 6039 qreply(q, mp6_conn_ctl); 6040 6041 /* table of MLP attributes... */ 6042 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 6043 sizeof (struct T_optmgmt_ack)]; 6044 optp->level = MIB2_UDP6; 6045 optp->name = EXPER_XPORT_MLP; 6046 optp->len = msgdsize(mp6_attr_ctl->b_cont); 6047 if (optp->len == 0) 6048 freemsg(mp6_attr_ctl); 6049 else 6050 qreply(q, mp6_attr_ctl); 6051 6052 return (1); 6053 } 6054 6055 /* 6056 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 6057 * NOTE: Per MIB-II, UDP has no writable data. 6058 * TODO: If this ever actually tries to set anything, it needs to be 6059 * to do the appropriate locking. 6060 */ 6061 /* ARGSUSED */ 6062 int 6063 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 6064 uchar_t *ptr, int len) 6065 { 6066 switch (level) { 6067 case MIB2_UDP: 6068 return (0); 6069 default: 6070 return (1); 6071 } 6072 } 6073 6074 static void 6075 udp_report_item(mblk_t *mp, udp_t *udp) 6076 { 6077 char *state; 6078 char addrbuf1[INET6_ADDRSTRLEN]; 6079 char addrbuf2[INET6_ADDRSTRLEN]; 6080 uint_t print_len, buf_len; 6081 6082 buf_len = mp->b_datap->db_lim - mp->b_wptr; 6083 ASSERT(buf_len >= 0); 6084 if (buf_len == 0) 6085 return; 6086 6087 if (udp->udp_state == TS_UNBND) 6088 state = "UNBOUND"; 6089 else if (udp->udp_state == TS_IDLE) 6090 state = "IDLE"; 6091 else if (udp->udp_state == TS_DATA_XFER) 6092 state = "CONNECTED"; 6093 else 6094 state = "UnkState"; 6095 print_len = snprintf((char *)mp->b_wptr, buf_len, 6096 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 6097 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 6098 inet_ntop(AF_INET6, &udp->udp_v6src, 6099 addrbuf1, sizeof (addrbuf1)), 6100 inet_ntop(AF_INET6, &udp->udp_v6dst, 6101 addrbuf2, sizeof (addrbuf2)), 6102 ntohs(udp->udp_dstport), state); 6103 if (print_len < buf_len) { 6104 mp->b_wptr += print_len; 6105 } else { 6106 mp->b_wptr += buf_len; 6107 } 6108 } 6109 6110 /* Report for ndd "udp_status" */ 6111 /* ARGSUSED */ 6112 static int 6113 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 6114 { 6115 zoneid_t zoneid; 6116 connf_t *connfp; 6117 conn_t *connp = Q_TO_CONN(q); 6118 udp_t *udp = connp->conn_udp; 6119 int i; 6120 udp_stack_t *us = udp->udp_us; 6121 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6122 6123 /* 6124 * Because of the ndd constraint, at most we can have 64K buffer 6125 * to put in all UDP info. So to be more efficient, just 6126 * allocate a 64K buffer here, assuming we need that large buffer. 6127 * This may be a problem as any user can read udp_status. Therefore 6128 * we limit the rate of doing this using us_ndd_get_info_interval. 6129 * This should be OK as normal users should not do this too often. 6130 */ 6131 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 6132 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 6133 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 6134 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 6135 return (0); 6136 } 6137 } 6138 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 6139 /* The following may work even if we cannot get a large buf. */ 6140 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 6141 return (0); 6142 } 6143 (void) mi_mpprintf(mp, 6144 "UDP " MI_COL_HDRPAD_STR 6145 /* 12345678[89ABCDEF] */ 6146 " zone lport src addr dest addr port state"); 6147 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 6148 6149 zoneid = connp->conn_zoneid; 6150 6151 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 6152 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 6153 connp = NULL; 6154 6155 while ((connp = ipcl_get_next_conn(connfp, connp, 6156 IPCL_UDP))) { 6157 udp = connp->conn_udp; 6158 if (zoneid != GLOBAL_ZONEID && 6159 zoneid != connp->conn_zoneid) 6160 continue; 6161 6162 udp_report_item(mp->b_cont, udp); 6163 } 6164 } 6165 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 6166 return (0); 6167 } 6168 6169 /* 6170 * This routine creates a T_UDERROR_IND message and passes it upstream. 6171 * The address and options are copied from the T_UNITDATA_REQ message 6172 * passed in mp. This message is freed. 6173 */ 6174 static void 6175 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 6176 t_scalar_t err) 6177 { 6178 struct T_unitdata_req *tudr; 6179 mblk_t *mp1; 6180 uchar_t *optaddr; 6181 t_scalar_t optlen; 6182 6183 if (DB_TYPE(mp) == M_DATA) { 6184 ASSERT(destaddr != NULL && destlen != 0); 6185 optaddr = NULL; 6186 optlen = 0; 6187 } else { 6188 if ((mp->b_wptr < mp->b_rptr) || 6189 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 6190 goto done; 6191 } 6192 tudr = (struct T_unitdata_req *)mp->b_rptr; 6193 destaddr = mp->b_rptr + tudr->DEST_offset; 6194 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 6195 destaddr + tudr->DEST_length < mp->b_rptr || 6196 destaddr + tudr->DEST_length > mp->b_wptr) { 6197 goto done; 6198 } 6199 optaddr = mp->b_rptr + tudr->OPT_offset; 6200 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 6201 optaddr + tudr->OPT_length < mp->b_rptr || 6202 optaddr + tudr->OPT_length > mp->b_wptr) { 6203 goto done; 6204 } 6205 destlen = tudr->DEST_length; 6206 optlen = tudr->OPT_length; 6207 } 6208 6209 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 6210 (char *)optaddr, optlen, err); 6211 if (mp1 != NULL) 6212 putnext(UDP_RD(q), mp1); 6213 6214 done: 6215 freemsg(mp); 6216 } 6217 6218 /* 6219 * This routine removes a port number association from a stream. It 6220 * is called by udp_wput to handle T_UNBIND_REQ messages. 6221 */ 6222 static void 6223 udp_unbind(queue_t *q, mblk_t *mp) 6224 { 6225 udp_t *udp = Q_TO_UDP(q); 6226 6227 /* If a bind has not been done, we can't unbind. */ 6228 if (udp->udp_state == TS_UNBND) { 6229 udp_err_ack(q, mp, TOUTSTATE, 0); 6230 return; 6231 } 6232 if (cl_inet_unbind != NULL) { 6233 /* 6234 * Running in cluster mode - register unbind information 6235 */ 6236 if (udp->udp_ipversion == IPV4_VERSION) { 6237 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 6238 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 6239 (in_port_t)udp->udp_port); 6240 } else { 6241 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 6242 (uint8_t *)&(udp->udp_v6src), 6243 (in_port_t)udp->udp_port); 6244 } 6245 } 6246 6247 udp_bind_hash_remove(udp, B_FALSE); 6248 V6_SET_ZERO(udp->udp_v6src); 6249 V6_SET_ZERO(udp->udp_bound_v6src); 6250 udp->udp_port = 0; 6251 udp->udp_state = TS_UNBND; 6252 6253 if (udp->udp_family == AF_INET6) { 6254 int error; 6255 6256 /* Rebuild the header template */ 6257 error = udp_build_hdrs(q, udp); 6258 if (error != 0) { 6259 udp_err_ack(q, mp, TSYSERR, error); 6260 return; 6261 } 6262 } 6263 /* 6264 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 6265 * and therefore ip_unbind must never return NULL. 6266 */ 6267 mp = ip_unbind(q, mp); 6268 ASSERT(mp != NULL); 6269 putnext(UDP_RD(q), mp); 6270 } 6271 6272 /* 6273 * Don't let port fall into the privileged range. 6274 * Since the extra privileged ports can be arbitrary we also 6275 * ensure that we exclude those from consideration. 6276 * us->us_epriv_ports is not sorted thus we loop over it until 6277 * there are no changes. 6278 */ 6279 static in_port_t 6280 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 6281 { 6282 int i; 6283 in_port_t nextport; 6284 boolean_t restart = B_FALSE; 6285 udp_stack_t *us = udp->udp_us; 6286 6287 if (random && udp_random_anon_port != 0) { 6288 (void) random_get_pseudo_bytes((uint8_t *)&port, 6289 sizeof (in_port_t)); 6290 /* 6291 * Unless changed by a sys admin, the smallest anon port 6292 * is 32768 and the largest anon port is 65535. It is 6293 * very likely (50%) for the random port to be smaller 6294 * than the smallest anon port. When that happens, 6295 * add port % (anon port range) to the smallest anon 6296 * port to get the random port. It should fall into the 6297 * valid anon port range. 6298 */ 6299 if (port < us->us_smallest_anon_port) { 6300 port = us->us_smallest_anon_port + 6301 port % (us->us_largest_anon_port - 6302 us->us_smallest_anon_port); 6303 } 6304 } 6305 6306 retry: 6307 if (port < us->us_smallest_anon_port) 6308 port = us->us_smallest_anon_port; 6309 6310 if (port > us->us_largest_anon_port) { 6311 port = us->us_smallest_anon_port; 6312 if (restart) 6313 return (0); 6314 restart = B_TRUE; 6315 } 6316 6317 if (port < us->us_smallest_nonpriv_port) 6318 port = us->us_smallest_nonpriv_port; 6319 6320 for (i = 0; i < us->us_num_epriv_ports; i++) { 6321 if (port == us->us_epriv_ports[i]) { 6322 port++; 6323 /* 6324 * Make sure that the port is in the 6325 * valid range. 6326 */ 6327 goto retry; 6328 } 6329 } 6330 6331 if (is_system_labeled() && 6332 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 6333 port, IPPROTO_UDP, B_TRUE)) != 0) { 6334 port = nextport; 6335 goto retry; 6336 } 6337 6338 return (port); 6339 } 6340 6341 static int 6342 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 6343 { 6344 int err; 6345 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 6346 udp_t *udp = Q_TO_UDP(wq); 6347 6348 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 6349 opt_storage, udp->udp_mac_exempt, 6350 udp->udp_us->us_netstack->netstack_ip); 6351 if (err == 0) { 6352 err = tsol_update_options(&udp->udp_ip_snd_options, 6353 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 6354 opt_storage); 6355 } 6356 if (err != 0) { 6357 DTRACE_PROBE4( 6358 tx__ip__log__info__updatelabel__udp, 6359 char *, "queue(1) failed to update options(2) on mp(3)", 6360 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6361 } else { 6362 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 6363 } 6364 return (err); 6365 } 6366 6367 static mblk_t * 6368 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 6369 uint_t srcid, int *error) 6370 { 6371 udp_t *udp = connp->conn_udp; 6372 queue_t *q = connp->conn_wq; 6373 mblk_t *mp1 = mp; 6374 mblk_t *mp2; 6375 ipha_t *ipha; 6376 int ip_hdr_length; 6377 uint32_t ip_len; 6378 udpha_t *udpha; 6379 udpattrs_t attrs; 6380 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 6381 uint32_t ip_snd_opt_len = 0; 6382 ip4_pkt_t pktinfo; 6383 ip4_pkt_t *pktinfop = &pktinfo; 6384 ip_opt_info_t optinfo; 6385 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6386 udp_stack_t *us = udp->udp_us; 6387 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 6388 6389 6390 *error = 0; 6391 pktinfop->ip4_ill_index = 0; 6392 pktinfop->ip4_addr = INADDR_ANY; 6393 optinfo.ip_opt_flags = 0; 6394 optinfo.ip_opt_ill_index = 0; 6395 6396 if (v4dst == INADDR_ANY) 6397 v4dst = htonl(INADDR_LOOPBACK); 6398 6399 /* 6400 * If options passed in, feed it for verification and handling 6401 */ 6402 attrs.udpattr_credset = B_FALSE; 6403 if (DB_TYPE(mp) != M_DATA) { 6404 mp1 = mp->b_cont; 6405 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6406 attrs.udpattr_ipp4 = pktinfop; 6407 attrs.udpattr_mb = mp; 6408 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 6409 goto done; 6410 /* 6411 * Note: success in processing options. 6412 * mp option buffer represented by 6413 * OPT_length/offset now potentially modified 6414 * and contain option setting results 6415 */ 6416 ASSERT(*error == 0); 6417 } 6418 } 6419 6420 /* mp1 points to the M_DATA mblk carrying the packet */ 6421 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6422 6423 /* 6424 * Check if our saved options are valid; update if not 6425 * TSOL Note: Since we are not in WRITER mode, UDP packets 6426 * to different destination may require different labels. 6427 * We use conn_lock to ensure that lastdst, ip_snd_options, 6428 * and ip_snd_options_len are consistent for the current 6429 * destination and are updated atomically. 6430 */ 6431 mutex_enter(&connp->conn_lock); 6432 if (is_system_labeled()) { 6433 /* Using UDP MLP requires SCM_UCRED from user */ 6434 if (connp->conn_mlp_type != mlptSingle && 6435 !attrs.udpattr_credset) { 6436 mutex_exit(&connp->conn_lock); 6437 DTRACE_PROBE4( 6438 tx__ip__log__info__output__udp, 6439 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6440 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6441 *error = ECONNREFUSED; 6442 goto done; 6443 } 6444 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 6445 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst) && 6446 (*error = udp_update_label(q, mp, v4dst)) != 0) { 6447 mutex_exit(&connp->conn_lock); 6448 goto done; 6449 } 6450 } 6451 if (udp->udp_ip_snd_options_len > 0) { 6452 ip_snd_opt_len = udp->udp_ip_snd_options_len; 6453 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 6454 } 6455 mutex_exit(&connp->conn_lock); 6456 6457 /* Add an IP header */ 6458 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len; 6459 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 6460 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 6461 !OK_32PTR(ipha)) { 6462 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 6463 if (mp2 == NULL) { 6464 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6465 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 6466 *error = ENOMEM; 6467 goto done; 6468 } 6469 mp2->b_wptr = DB_LIM(mp2); 6470 mp2->b_cont = mp1; 6471 mp1 = mp2; 6472 if (DB_TYPE(mp) != M_DATA) 6473 mp->b_cont = mp1; 6474 else 6475 mp = mp1; 6476 6477 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 6478 } 6479 ip_hdr_length -= UDPH_SIZE; 6480 #ifdef _BIG_ENDIAN 6481 /* Set version, header length, and tos */ 6482 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6483 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 6484 udp->udp_type_of_service); 6485 /* Set ttl and protocol */ 6486 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 6487 #else 6488 /* Set version, header length, and tos */ 6489 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6490 ((udp->udp_type_of_service << 8) | 6491 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 6492 /* Set ttl and protocol */ 6493 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 6494 #endif 6495 if (pktinfop->ip4_addr != INADDR_ANY) { 6496 ipha->ipha_src = pktinfop->ip4_addr; 6497 optinfo.ip_opt_flags = IP_VERIFY_SRC; 6498 } else { 6499 /* 6500 * Copy our address into the packet. If this is zero, 6501 * first look at __sin6_src_id for a hint. If we leave the 6502 * source as INADDR_ANY then ip will fill in the real source 6503 * address. 6504 */ 6505 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 6506 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 6507 in6_addr_t v6src; 6508 6509 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 6510 us->us_netstack); 6511 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 6512 } 6513 } 6514 6515 if (pktinfop->ip4_ill_index != 0) { 6516 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 6517 } 6518 6519 ipha->ipha_fragment_offset_and_flags = 0; 6520 ipha->ipha_ident = 0; 6521 6522 mp1->b_rptr = (uchar_t *)ipha; 6523 6524 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 6525 (uintptr_t)UINT_MAX); 6526 6527 /* Determine length of packet */ 6528 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 6529 if ((mp2 = mp1->b_cont) != NULL) { 6530 do { 6531 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6532 ip_len += (uint32_t)MBLKL(mp2); 6533 } while ((mp2 = mp2->b_cont) != NULL); 6534 } 6535 /* 6536 * If the size of the packet is greater than the maximum allowed by 6537 * ip, return an error. Passing this down could cause panics because 6538 * the size will have wrapped and be inconsistent with the msg size. 6539 */ 6540 if (ip_len > IP_MAXPACKET) { 6541 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6542 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 6543 *error = EMSGSIZE; 6544 goto done; 6545 } 6546 ipha->ipha_length = htons((uint16_t)ip_len); 6547 ip_len -= ip_hdr_length; 6548 ip_len = htons((uint16_t)ip_len); 6549 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 6550 6551 /* 6552 * Copy in the destination address 6553 */ 6554 ipha->ipha_dst = v4dst; 6555 6556 /* 6557 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 6558 */ 6559 if (CLASSD(v4dst)) 6560 ipha->ipha_ttl = udp->udp_multicast_ttl; 6561 6562 udpha->uha_dst_port = port; 6563 udpha->uha_src_port = udp->udp_port; 6564 6565 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 6566 uint32_t cksum; 6567 6568 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 6569 /* 6570 * Massage source route putting first source route in ipha_dst. 6571 * Ignore the destination in T_unitdata_req. 6572 * Create a checksum adjustment for a source route, if any. 6573 */ 6574 cksum = ip_massage_options(ipha, us->us_netstack); 6575 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6576 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 6577 (ipha->ipha_dst & 0xFFFF); 6578 if ((int)cksum < 0) 6579 cksum--; 6580 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6581 /* 6582 * IP does the checksum if uha_checksum is non-zero, 6583 * We make it easy for IP to include our pseudo header 6584 * by putting our length in uha_checksum. 6585 */ 6586 cksum += ip_len; 6587 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6588 /* There might be a carry. */ 6589 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6590 #ifdef _LITTLE_ENDIAN 6591 if (us->us_do_checksum) 6592 ip_len = (cksum << 16) | ip_len; 6593 #else 6594 if (us->us_do_checksum) 6595 ip_len = (ip_len << 16) | cksum; 6596 else 6597 ip_len <<= 16; 6598 #endif 6599 } else { 6600 /* 6601 * IP does the checksum if uha_checksum is non-zero, 6602 * We make it easy for IP to include our pseudo header 6603 * by putting our length in uha_checksum. 6604 */ 6605 if (us->us_do_checksum) 6606 ip_len |= (ip_len << 16); 6607 #ifndef _LITTLE_ENDIAN 6608 else 6609 ip_len <<= 16; 6610 #endif 6611 } 6612 6613 /* Set UDP length and checksum */ 6614 *((uint32_t *)&udpha->uha_length) = ip_len; 6615 if (DB_CRED(mp) != NULL) 6616 mblk_setcred(mp1, DB_CRED(mp)); 6617 6618 if (DB_TYPE(mp) != M_DATA) { 6619 ASSERT(mp != mp1); 6620 freeb(mp); 6621 } 6622 6623 /* mp has been consumed and we'll return success */ 6624 ASSERT(*error == 0); 6625 mp = NULL; 6626 6627 /* We're done. Pass the packet to ip. */ 6628 BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); 6629 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6630 "udp_wput_end: q %p (%S)", q, "end"); 6631 6632 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6633 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 6634 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 6635 connp->conn_nofailover_ill != NULL || 6636 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 6637 optinfo.ip_opt_ill_index != 0 || 6638 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6639 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 6640 ipst->ips_ip_g_mrouter != NULL) { 6641 UDP_STAT(us, udp_ip_send); 6642 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 6643 &optinfo); 6644 } else { 6645 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6646 } 6647 6648 done: 6649 if (*error != 0) { 6650 ASSERT(mp != NULL); 6651 BUMP_MIB(&udp->udp_mib, udpOutErrors); 6652 } 6653 return (mp); 6654 } 6655 6656 static void 6657 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6658 { 6659 conn_t *connp = udp->udp_connp; 6660 ipaddr_t src, dst; 6661 ill_t *ill; 6662 ire_t *ire; 6663 ipif_t *ipif = NULL; 6664 mblk_t *ire_fp_mp; 6665 uint_t ire_fp_mp_len; 6666 uint16_t *up; 6667 uint32_t cksum, hcksum_txflags; 6668 queue_t *dev_q; 6669 boolean_t retry_caching; 6670 udp_stack_t *us = udp->udp_us; 6671 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6672 6673 dst = ipha->ipha_dst; 6674 src = ipha->ipha_src; 6675 ASSERT(ipha->ipha_ident == 0); 6676 6677 if (CLASSD(dst)) { 6678 int err; 6679 6680 ipif = conn_get_held_ipif(connp, 6681 &connp->conn_multicast_ipif, &err); 6682 6683 if (ipif == NULL || ipif->ipif_isv6 || 6684 (ipif->ipif_ill->ill_phyint->phyint_flags & 6685 PHYI_LOOPBACK)) { 6686 if (ipif != NULL) 6687 ipif_refrele(ipif); 6688 UDP_STAT(us, udp_ip_send); 6689 ip_output(connp, mp, q, IP_WPUT); 6690 return; 6691 } 6692 } 6693 6694 retry_caching = B_FALSE; 6695 mutex_enter(&connp->conn_lock); 6696 ire = connp->conn_ire_cache; 6697 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6698 6699 if (ire == NULL || ire->ire_addr != dst || 6700 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6701 retry_caching = B_TRUE; 6702 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6703 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6704 6705 ASSERT(ipif != NULL); 6706 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6707 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6708 retry_caching = B_TRUE; 6709 } 6710 6711 if (!retry_caching) { 6712 ASSERT(ire != NULL); 6713 IRE_REFHOLD(ire); 6714 mutex_exit(&connp->conn_lock); 6715 } else { 6716 boolean_t cached = B_FALSE; 6717 6718 connp->conn_ire_cache = NULL; 6719 mutex_exit(&connp->conn_lock); 6720 6721 /* Release the old ire */ 6722 if (ire != NULL) { 6723 IRE_REFRELE_NOTR(ire); 6724 ire = NULL; 6725 } 6726 6727 if (CLASSD(dst)) { 6728 ASSERT(ipif != NULL); 6729 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6730 connp->conn_zoneid, MBLK_GETLABEL(mp), 6731 MATCH_IRE_ILL_GROUP, ipst); 6732 } else { 6733 ASSERT(ipif == NULL); 6734 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6735 MBLK_GETLABEL(mp), ipst); 6736 } 6737 6738 if (ire == NULL) { 6739 if (ipif != NULL) 6740 ipif_refrele(ipif); 6741 UDP_STAT(us, udp_ire_null); 6742 ip_output(connp, mp, q, IP_WPUT); 6743 return; 6744 } 6745 IRE_REFHOLD_NOTR(ire); 6746 6747 mutex_enter(&connp->conn_lock); 6748 if (!(connp->conn_state_flags & CONN_CLOSING) && 6749 connp->conn_ire_cache == NULL) { 6750 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6751 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6752 connp->conn_ire_cache = ire; 6753 cached = B_TRUE; 6754 } 6755 rw_exit(&ire->ire_bucket->irb_lock); 6756 } 6757 mutex_exit(&connp->conn_lock); 6758 6759 /* 6760 * We can continue to use the ire but since it was not 6761 * cached, we should drop the extra reference. 6762 */ 6763 if (!cached) 6764 IRE_REFRELE_NOTR(ire); 6765 } 6766 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6767 ASSERT(!CLASSD(dst) || ipif != NULL); 6768 6769 /* 6770 * Check if we can take the fast-path. 6771 * Note that "incomplete" ire's (where the link-layer for next hop 6772 * is not resolved, or where the fast-path header in nce_fp_mp is not 6773 * available yet) are sent down the legacy (slow) path 6774 */ 6775 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6776 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6777 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6778 (connp->conn_nexthop_set) || 6779 (ire->ire_nce == NULL) || 6780 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) || 6781 ((ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp))) { 6782 if (ipif != NULL) 6783 ipif_refrele(ipif); 6784 UDP_STAT(us, udp_ip_ire_send); 6785 IRE_REFRELE(ire); 6786 ip_output(connp, mp, q, IP_WPUT); 6787 return; 6788 } 6789 6790 ill = ire_to_ill(ire); 6791 ASSERT(ill != NULL); 6792 6793 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6794 6795 dev_q = ire->ire_stq->q_next; 6796 ASSERT(dev_q != NULL); 6797 /* 6798 * If the service thread is already running, or if the driver 6799 * queue is currently flow-controlled, queue this packet. 6800 */ 6801 if ((q->q_first != NULL || connp->conn_draining) || 6802 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6803 if (ipst->ips_ip_output_queue) { 6804 (void) putq(q, mp); 6805 } else { 6806 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6807 freemsg(mp); 6808 } 6809 if (ipif != NULL) 6810 ipif_refrele(ipif); 6811 IRE_REFRELE(ire); 6812 return; 6813 } 6814 6815 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6816 #ifndef _BIG_ENDIAN 6817 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6818 #endif 6819 6820 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6821 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6822 src = ipha->ipha_src = ipif->ipif_src_addr; 6823 else 6824 src = ipha->ipha_src = ire->ire_src_addr; 6825 } 6826 6827 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6828 ASSERT(ill->ill_hcksum_capab != NULL); 6829 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6830 } else { 6831 hcksum_txflags = 0; 6832 } 6833 6834 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6835 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6836 6837 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6838 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6839 if (*up != 0) { 6840 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6841 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6842 ntohs(ipha->ipha_length), cksum); 6843 6844 /* Software checksum? */ 6845 if (DB_CKSUMFLAGS(mp) == 0) { 6846 UDP_STAT(us, udp_out_sw_cksum); 6847 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 6848 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6849 } 6850 } 6851 6852 ipha->ipha_fragment_offset_and_flags |= 6853 (uint32_t)htons(ire->ire_frag_flag); 6854 6855 /* Calculate IP header checksum if hardware isn't capable */ 6856 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6857 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6858 ((uint16_t *)ipha)[4]); 6859 } 6860 6861 if (CLASSD(dst)) { 6862 ilm_t *ilm; 6863 6864 ILM_WALKER_HOLD(ill); 6865 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6866 ILM_WALKER_RELE(ill); 6867 if (ilm != NULL) { 6868 ip_multicast_loopback(q, ill, mp, 6869 connp->conn_multicast_loop ? 0 : 6870 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6871 } 6872 6873 /* If multicast TTL is 0 then we are done */ 6874 if (ipha->ipha_ttl == 0) { 6875 if (ipif != NULL) 6876 ipif_refrele(ipif); 6877 freemsg(mp); 6878 IRE_REFRELE(ire); 6879 return; 6880 } 6881 } 6882 6883 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6884 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6885 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6886 6887 UPDATE_OB_PKT_COUNT(ire); 6888 ire->ire_last_used_time = lbolt; 6889 6890 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6891 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6892 ntohs(ipha->ipha_length)); 6893 6894 if (ILL_DLS_CAPABLE(ill)) { 6895 /* 6896 * Send the packet directly to DLD, where it may be queued 6897 * depending on the availability of transmit resources at 6898 * the media layer. 6899 */ 6900 IP_DLS_ILL_TX(ill, ipha, mp, ipst); 6901 } else { 6902 DTRACE_PROBE4(ip4__physical__out__start, 6903 ill_t *, NULL, ill_t *, ill, 6904 ipha_t *, ipha, mblk_t *, mp); 6905 FW_HOOKS(ipst->ips_ip4_physical_out_event, 6906 ipst->ips_ipv4firewall_physical_out, 6907 NULL, ill, ipha, mp, mp, ipst); 6908 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6909 if (mp != NULL) 6910 putnext(ire->ire_stq, mp); 6911 } 6912 6913 if (ipif != NULL) 6914 ipif_refrele(ipif); 6915 IRE_REFRELE(ire); 6916 } 6917 6918 static boolean_t 6919 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6920 { 6921 udp_t *udp = Q_TO_UDP(wq); 6922 int err; 6923 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6924 6925 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6926 dst, opt_storage, udp->udp_mac_exempt, 6927 udp->udp_us->us_netstack->netstack_ip); 6928 if (err == 0) { 6929 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6930 &udp->udp_label_len_v6, opt_storage); 6931 } 6932 if (err != 0) { 6933 DTRACE_PROBE4( 6934 tx__ip__log__drop__updatelabel__udp6, 6935 char *, "queue(1) failed to update options(2) on mp(3)", 6936 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6937 } else { 6938 udp->udp_v6lastdst = *dst; 6939 } 6940 return (err); 6941 } 6942 6943 /* 6944 * This routine handles all messages passed downstream. It either 6945 * consumes the message or passes it downstream; it never queues a 6946 * a message. 6947 */ 6948 static void 6949 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6950 { 6951 sin6_t *sin6; 6952 sin_t *sin; 6953 ipaddr_t v4dst; 6954 uint16_t port; 6955 uint_t srcid; 6956 queue_t *q = connp->conn_wq; 6957 udp_t *udp = connp->conn_udp; 6958 int error = 0; 6959 struct sockaddr_storage ss; 6960 udp_stack_t *us = udp->udp_us; 6961 6962 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6963 "udp_wput_start: connp %p mp %p", connp, mp); 6964 6965 /* 6966 * We directly handle several cases here: T_UNITDATA_REQ message 6967 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6968 * connected and non-connected socket. The latter carries the 6969 * address structure along when this routine gets called. 6970 */ 6971 switch (DB_TYPE(mp)) { 6972 case M_DATA: 6973 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6974 if (!udp->udp_direct_sockfs || 6975 addr == NULL || addrlen == 0) { 6976 /* Not connected; address is required */ 6977 BUMP_MIB(&udp->udp_mib, udpOutErrors); 6978 UDP_STAT(us, udp_out_err_notconn); 6979 freemsg(mp); 6980 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6981 "udp_wput_end: connp %p (%S)", connp, 6982 "not-connected; address required"); 6983 return; 6984 } 6985 ASSERT(udp->udp_issocket); 6986 UDP_DBGSTAT(us, udp_data_notconn); 6987 /* Not connected; do some more checks below */ 6988 break; 6989 } 6990 /* M_DATA for connected socket */ 6991 UDP_DBGSTAT(us, udp_data_conn); 6992 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6993 6994 /* Initialize addr and addrlen as if they're passed in */ 6995 if (udp->udp_family == AF_INET) { 6996 sin = (sin_t *)&ss; 6997 sin->sin_family = AF_INET; 6998 sin->sin_port = udp->udp_dstport; 6999 sin->sin_addr.s_addr = v4dst; 7000 addr = (struct sockaddr *)sin; 7001 addrlen = sizeof (*sin); 7002 } else { 7003 sin6 = (sin6_t *)&ss; 7004 sin6->sin6_family = AF_INET6; 7005 sin6->sin6_port = udp->udp_dstport; 7006 sin6->sin6_flowinfo = udp->udp_flowinfo; 7007 sin6->sin6_addr = udp->udp_v6dst; 7008 sin6->sin6_scope_id = 0; 7009 sin6->__sin6_src_id = 0; 7010 addr = (struct sockaddr *)sin6; 7011 addrlen = sizeof (*sin6); 7012 } 7013 7014 if (udp->udp_family == AF_INET || 7015 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 7016 /* 7017 * Handle both AF_INET and AF_INET6; the latter 7018 * for IPV4 mapped destination addresses. Note 7019 * here that both addr and addrlen point to the 7020 * corresponding struct depending on the address 7021 * family of the socket. 7022 */ 7023 mp = udp_output_v4(connp, mp, v4dst, 7024 udp->udp_dstport, 0, &error); 7025 } else { 7026 mp = udp_output_v6(connp, mp, sin6, &error); 7027 } 7028 if (error != 0) { 7029 ASSERT(addr != NULL && addrlen != 0); 7030 goto ud_error; 7031 } 7032 return; 7033 case M_PROTO: 7034 case M_PCPROTO: { 7035 struct T_unitdata_req *tudr; 7036 7037 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 7038 tudr = (struct T_unitdata_req *)mp->b_rptr; 7039 7040 /* Handle valid T_UNITDATA_REQ here */ 7041 if (MBLKL(mp) >= sizeof (*tudr) && 7042 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 7043 if (mp->b_cont == NULL) { 7044 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7045 "udp_wput_end: q %p (%S)", q, "badaddr"); 7046 error = EPROTO; 7047 goto ud_error; 7048 } 7049 7050 if (!MBLKIN(mp, 0, tudr->DEST_offset + 7051 tudr->DEST_length)) { 7052 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7053 "udp_wput_end: q %p (%S)", q, "badaddr"); 7054 error = EADDRNOTAVAIL; 7055 goto ud_error; 7056 } 7057 /* 7058 * If a port has not been bound to the stream, fail. 7059 * This is not a problem when sockfs is directly 7060 * above us, because it will ensure that the socket 7061 * is first bound before allowing data to be sent. 7062 */ 7063 if (udp->udp_state == TS_UNBND) { 7064 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7065 "udp_wput_end: q %p (%S)", q, "outstate"); 7066 error = EPROTO; 7067 goto ud_error; 7068 } 7069 addr = (struct sockaddr *) 7070 &mp->b_rptr[tudr->DEST_offset]; 7071 addrlen = tudr->DEST_length; 7072 if (tudr->OPT_length != 0) 7073 UDP_STAT(us, udp_out_opt); 7074 break; 7075 } 7076 /* FALLTHRU */ 7077 } 7078 default: 7079 udp_become_writer(connp, mp, udp_wput_other_wrapper, 7080 SQTAG_UDP_OUTPUT); 7081 return; 7082 } 7083 ASSERT(addr != NULL); 7084 7085 switch (udp->udp_family) { 7086 case AF_INET6: 7087 sin6 = (sin6_t *)addr; 7088 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 7089 sin6->sin6_family != AF_INET6) { 7090 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7091 "udp_wput_end: q %p (%S)", q, "badaddr"); 7092 error = EADDRNOTAVAIL; 7093 goto ud_error; 7094 } 7095 7096 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 7097 /* 7098 * Destination is a non-IPv4-compatible IPv6 address. 7099 * Send out an IPv6 format packet. 7100 */ 7101 mp = udp_output_v6(connp, mp, sin6, &error); 7102 if (error != 0) 7103 goto ud_error; 7104 7105 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7106 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 7107 return; 7108 } 7109 /* 7110 * If the local address is not zero or a mapped address 7111 * return an error. It would be possible to send an IPv4 7112 * packet but the response would never make it back to the 7113 * application since it is bound to a non-mapped address. 7114 */ 7115 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 7116 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7117 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7118 "udp_wput_end: q %p (%S)", q, "badaddr"); 7119 error = EADDRNOTAVAIL; 7120 goto ud_error; 7121 } 7122 /* Send IPv4 packet without modifying udp_ipversion */ 7123 /* Extract port and ipaddr */ 7124 port = sin6->sin6_port; 7125 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 7126 srcid = sin6->__sin6_src_id; 7127 break; 7128 7129 case AF_INET: 7130 sin = (sin_t *)addr; 7131 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 7132 sin->sin_family != AF_INET) { 7133 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7134 "udp_wput_end: q %p (%S)", q, "badaddr"); 7135 error = EADDRNOTAVAIL; 7136 goto ud_error; 7137 } 7138 /* Extract port and ipaddr */ 7139 port = sin->sin_port; 7140 v4dst = sin->sin_addr.s_addr; 7141 srcid = 0; 7142 break; 7143 } 7144 7145 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 7146 if (error != 0) { 7147 ud_error: 7148 UDP_STAT(us, udp_out_err_output); 7149 ASSERT(mp != NULL); 7150 /* mp is freed by the following routine */ 7151 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 7152 (t_scalar_t)error); 7153 } 7154 } 7155 7156 /* ARGSUSED */ 7157 static void 7158 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 7159 { 7160 udp_output((conn_t *)arg, mp, NULL, 0); 7161 _UDP_EXIT((conn_t *)arg); 7162 } 7163 7164 static void 7165 udp_wput(queue_t *q, mblk_t *mp) 7166 { 7167 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 7168 SQTAG_UDP_WPUT); 7169 } 7170 7171 /* 7172 * Allocate and prepare a T_UNITDATA_REQ message. 7173 */ 7174 static mblk_t * 7175 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 7176 { 7177 struct T_unitdata_req *tudr; 7178 mblk_t *mp; 7179 7180 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 7181 if (mp != NULL) { 7182 mp->b_wptr += sizeof (*tudr) + addrlen; 7183 DB_TYPE(mp) = M_PROTO; 7184 7185 tudr = (struct T_unitdata_req *)mp->b_rptr; 7186 tudr->PRIM_type = T_UNITDATA_REQ; 7187 tudr->DEST_length = addrlen; 7188 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 7189 tudr->OPT_length = 0; 7190 tudr->OPT_offset = 0; 7191 bcopy(addr, tudr+1, addrlen); 7192 } 7193 return (mp); 7194 } 7195 7196 /* 7197 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 7198 * is valid when we are directly beneath the stream head, and thus sockfs 7199 * is able to bypass STREAMS and directly call us, passing along the sockaddr 7200 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 7201 * this is done for both connected and non-connected endpoint. 7202 */ 7203 void 7204 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 7205 { 7206 conn_t *connp; 7207 udp_t *udp; 7208 udp_stack_t *us; 7209 7210 q = UDP_WR(q); 7211 connp = Q_TO_CONN(q); 7212 udp = connp->conn_udp; 7213 us = udp->udp_us; 7214 7215 /* udpsockfs should only send down M_DATA for this entry point */ 7216 ASSERT(DB_TYPE(mp) == M_DATA); 7217 7218 mutex_enter(&connp->conn_lock); 7219 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 7220 7221 if (udp->udp_mode != UDP_MT_HOT) { 7222 /* 7223 * We can't enter this conn right away because another 7224 * thread is currently executing as writer; therefore we 7225 * need to deposit the message into the squeue to be 7226 * drained later. If a socket address is present, we 7227 * need to create a T_UNITDATA_REQ message as placeholder. 7228 */ 7229 if (addr != NULL && addrlen != 0) { 7230 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 7231 7232 if (tudr_mp == NULL) { 7233 mutex_exit(&connp->conn_lock); 7234 BUMP_MIB(&udp->udp_mib, udpOutErrors); 7235 UDP_STAT(us, udp_out_err_tudr); 7236 freemsg(mp); 7237 return; 7238 } 7239 /* Tag the packet with T_UNITDATA_REQ */ 7240 tudr_mp->b_cont = mp; 7241 mp = tudr_mp; 7242 } 7243 mutex_exit(&connp->conn_lock); 7244 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 7245 return; 7246 } 7247 7248 /* We can execute as reader right away. */ 7249 UDP_READERS_INCREF(udp); 7250 mutex_exit(&connp->conn_lock); 7251 7252 udp_output(connp, mp, addr, addrlen); 7253 7254 udp_exit(connp); 7255 } 7256 7257 /* 7258 * udp_output_v6(): 7259 * Assumes that udp_wput did some sanity checking on the destination 7260 * address. 7261 */ 7262 static mblk_t * 7263 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 7264 { 7265 ip6_t *ip6h; 7266 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 7267 mblk_t *mp1 = mp; 7268 mblk_t *mp2; 7269 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7270 size_t ip_len; 7271 udpha_t *udph; 7272 udp_t *udp = connp->conn_udp; 7273 queue_t *q = connp->conn_wq; 7274 ip6_pkt_t ipp_s; /* For ancillary data options */ 7275 ip6_pkt_t *ipp = &ipp_s; 7276 ip6_pkt_t *tipp; /* temporary ipp */ 7277 uint32_t csum = 0; 7278 uint_t ignore = 0; 7279 uint_t option_exists = 0, is_sticky = 0; 7280 uint8_t *cp; 7281 uint8_t *nxthdr_ptr; 7282 in6_addr_t ip6_dst; 7283 udpattrs_t attrs; 7284 boolean_t opt_present; 7285 ip6_hbh_t *hopoptsptr = NULL; 7286 uint_t hopoptslen = 0; 7287 boolean_t is_ancillary = B_FALSE; 7288 udp_stack_t *us = udp->udp_us; 7289 7290 *error = 0; 7291 7292 /* 7293 * If the local address is a mapped address return 7294 * an error. 7295 * It would be possible to send an IPv6 packet but the 7296 * response would never make it back to the application 7297 * since it is bound to a mapped address. 7298 */ 7299 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 7300 *error = EADDRNOTAVAIL; 7301 goto done; 7302 } 7303 7304 ipp->ipp_fields = 0; 7305 ipp->ipp_sticky_ignored = 0; 7306 7307 /* 7308 * If TPI options passed in, feed it for verification and handling 7309 */ 7310 attrs.udpattr_credset = B_FALSE; 7311 opt_present = B_FALSE; 7312 if (DB_TYPE(mp) != M_DATA) { 7313 mp1 = mp->b_cont; 7314 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 7315 attrs.udpattr_ipp6 = ipp; 7316 attrs.udpattr_mb = mp; 7317 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 7318 goto done; 7319 ASSERT(*error == 0); 7320 opt_present = B_TRUE; 7321 } 7322 } 7323 ignore = ipp->ipp_sticky_ignored; 7324 7325 /* mp1 points to the M_DATA mblk carrying the packet */ 7326 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 7327 7328 if (sin6->sin6_scope_id != 0 && 7329 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 7330 /* 7331 * IPPF_SCOPE_ID is special. It's neither a sticky 7332 * option nor ancillary data. It needs to be 7333 * explicitly set in options_exists. 7334 */ 7335 option_exists |= IPPF_SCOPE_ID; 7336 } 7337 7338 /* 7339 * Compute the destination address 7340 */ 7341 ip6_dst = sin6->sin6_addr; 7342 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 7343 ip6_dst = ipv6_loopback; 7344 7345 /* 7346 * If we're not going to the same destination as last time, then 7347 * recompute the label required. This is done in a separate routine to 7348 * avoid blowing up our stack here. 7349 * 7350 * TSOL Note: Since we are not in WRITER mode, UDP packets 7351 * to different destination may require different labels. 7352 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 7353 * and sticky ipp_hopoptslen are consistent for the current 7354 * destination and are updated atomically. 7355 */ 7356 mutex_enter(&connp->conn_lock); 7357 if (is_system_labeled()) { 7358 /* Using UDP MLP requires SCM_UCRED from user */ 7359 if (connp->conn_mlp_type != mlptSingle && 7360 !attrs.udpattr_credset) { 7361 DTRACE_PROBE4( 7362 tx__ip__log__info__output__udp6, 7363 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 7364 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 7365 *error = ECONNREFUSED; 7366 mutex_exit(&connp->conn_lock); 7367 goto done; 7368 } 7369 if ((opt_present || 7370 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst)) && 7371 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 7372 mutex_exit(&connp->conn_lock); 7373 goto done; 7374 } 7375 } 7376 7377 /* 7378 * If there's a security label here, then we ignore any options the 7379 * user may try to set. We keep the peer's label as a hidden sticky 7380 * option. We make a private copy of this label before releasing the 7381 * lock so that label is kept consistent with the destination addr. 7382 */ 7383 if (udp->udp_label_len_v6 > 0) { 7384 ignore &= ~IPPF_HOPOPTS; 7385 ipp->ipp_fields &= ~IPPF_HOPOPTS; 7386 } 7387 7388 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 7389 /* No sticky options nor ancillary data. */ 7390 mutex_exit(&connp->conn_lock); 7391 goto no_options; 7392 } 7393 7394 /* 7395 * Go through the options figuring out where each is going to 7396 * come from and build two masks. The first mask indicates if 7397 * the option exists at all. The second mask indicates if the 7398 * option is sticky or ancillary. 7399 */ 7400 if (!(ignore & IPPF_HOPOPTS)) { 7401 if (ipp->ipp_fields & IPPF_HOPOPTS) { 7402 option_exists |= IPPF_HOPOPTS; 7403 udp_ip_hdr_len += ipp->ipp_hopoptslen; 7404 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 7405 option_exists |= IPPF_HOPOPTS; 7406 is_sticky |= IPPF_HOPOPTS; 7407 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 7408 hopoptsptr = kmem_alloc( 7409 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 7410 if (hopoptsptr == NULL) { 7411 *error = ENOMEM; 7412 mutex_exit(&connp->conn_lock); 7413 goto done; 7414 } 7415 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 7416 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 7417 hopoptslen); 7418 udp_ip_hdr_len += hopoptslen; 7419 } 7420 } 7421 mutex_exit(&connp->conn_lock); 7422 7423 if (!(ignore & IPPF_RTHDR)) { 7424 if (ipp->ipp_fields & IPPF_RTHDR) { 7425 option_exists |= IPPF_RTHDR; 7426 udp_ip_hdr_len += ipp->ipp_rthdrlen; 7427 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 7428 option_exists |= IPPF_RTHDR; 7429 is_sticky |= IPPF_RTHDR; 7430 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 7431 } 7432 } 7433 7434 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 7435 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 7436 option_exists |= IPPF_RTDSTOPTS; 7437 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 7438 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 7439 option_exists |= IPPF_RTDSTOPTS; 7440 is_sticky |= IPPF_RTDSTOPTS; 7441 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 7442 } 7443 } 7444 7445 if (!(ignore & IPPF_DSTOPTS)) { 7446 if (ipp->ipp_fields & IPPF_DSTOPTS) { 7447 option_exists |= IPPF_DSTOPTS; 7448 udp_ip_hdr_len += ipp->ipp_dstoptslen; 7449 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 7450 option_exists |= IPPF_DSTOPTS; 7451 is_sticky |= IPPF_DSTOPTS; 7452 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 7453 } 7454 } 7455 7456 if (!(ignore & IPPF_IFINDEX)) { 7457 if (ipp->ipp_fields & IPPF_IFINDEX) { 7458 option_exists |= IPPF_IFINDEX; 7459 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 7460 option_exists |= IPPF_IFINDEX; 7461 is_sticky |= IPPF_IFINDEX; 7462 } 7463 } 7464 7465 if (!(ignore & IPPF_ADDR)) { 7466 if (ipp->ipp_fields & IPPF_ADDR) { 7467 option_exists |= IPPF_ADDR; 7468 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 7469 option_exists |= IPPF_ADDR; 7470 is_sticky |= IPPF_ADDR; 7471 } 7472 } 7473 7474 if (!(ignore & IPPF_DONTFRAG)) { 7475 if (ipp->ipp_fields & IPPF_DONTFRAG) { 7476 option_exists |= IPPF_DONTFRAG; 7477 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 7478 option_exists |= IPPF_DONTFRAG; 7479 is_sticky |= IPPF_DONTFRAG; 7480 } 7481 } 7482 7483 if (!(ignore & IPPF_USE_MIN_MTU)) { 7484 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 7485 option_exists |= IPPF_USE_MIN_MTU; 7486 } else if (udp->udp_sticky_ipp.ipp_fields & 7487 IPPF_USE_MIN_MTU) { 7488 option_exists |= IPPF_USE_MIN_MTU; 7489 is_sticky |= IPPF_USE_MIN_MTU; 7490 } 7491 } 7492 7493 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 7494 option_exists |= IPPF_HOPLIMIT; 7495 /* IPV6_HOPLIMIT can never be sticky */ 7496 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 7497 7498 if (!(ignore & IPPF_UNICAST_HOPS) && 7499 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 7500 option_exists |= IPPF_UNICAST_HOPS; 7501 is_sticky |= IPPF_UNICAST_HOPS; 7502 } 7503 7504 if (!(ignore & IPPF_MULTICAST_HOPS) && 7505 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 7506 option_exists |= IPPF_MULTICAST_HOPS; 7507 is_sticky |= IPPF_MULTICAST_HOPS; 7508 } 7509 7510 if (!(ignore & IPPF_TCLASS)) { 7511 if (ipp->ipp_fields & IPPF_TCLASS) { 7512 option_exists |= IPPF_TCLASS; 7513 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 7514 option_exists |= IPPF_TCLASS; 7515 is_sticky |= IPPF_TCLASS; 7516 } 7517 } 7518 7519 if (!(ignore & IPPF_NEXTHOP) && 7520 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 7521 option_exists |= IPPF_NEXTHOP; 7522 is_sticky |= IPPF_NEXTHOP; 7523 } 7524 7525 no_options: 7526 7527 /* 7528 * If any options carried in the ip6i_t were specified, we 7529 * need to account for the ip6i_t in the data we'll be sending 7530 * down. 7531 */ 7532 if (option_exists & IPPF_HAS_IP6I) 7533 udp_ip_hdr_len += sizeof (ip6i_t); 7534 7535 /* check/fix buffer config, setup pointers into it */ 7536 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 7537 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 7538 !OK_32PTR(ip6h)) { 7539 /* Try to get everything in a single mblk next time */ 7540 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 7541 udp->udp_max_hdr_len = udp_ip_hdr_len; 7542 (void) mi_set_sth_wroff(UDP_RD(q), 7543 udp->udp_max_hdr_len + us->us_wroff_extra); 7544 } 7545 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 7546 if (mp2 == NULL) { 7547 *error = ENOMEM; 7548 goto done; 7549 } 7550 mp2->b_wptr = DB_LIM(mp2); 7551 mp2->b_cont = mp1; 7552 mp1 = mp2; 7553 if (DB_TYPE(mp) != M_DATA) 7554 mp->b_cont = mp1; 7555 else 7556 mp = mp1; 7557 7558 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 7559 } 7560 mp1->b_rptr = (unsigned char *)ip6h; 7561 ip6i = (ip6i_t *)ip6h; 7562 7563 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 7564 if (option_exists & IPPF_HAS_IP6I) { 7565 ip6h = (ip6_t *)&ip6i[1]; 7566 ip6i->ip6i_flags = 0; 7567 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7568 7569 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 7570 if (option_exists & IPPF_SCOPE_ID) { 7571 ip6i->ip6i_flags |= IP6I_IFINDEX; 7572 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 7573 } else if (option_exists & IPPF_IFINDEX) { 7574 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 7575 ASSERT(tipp->ipp_ifindex != 0); 7576 ip6i->ip6i_flags |= IP6I_IFINDEX; 7577 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 7578 } 7579 7580 if (option_exists & IPPF_ADDR) { 7581 /* 7582 * Enable per-packet source address verification if 7583 * IPV6_PKTINFO specified the source address. 7584 * ip6_src is set in the transport's _wput function. 7585 */ 7586 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7587 } 7588 7589 if (option_exists & IPPF_DONTFRAG) { 7590 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7591 } 7592 7593 if (option_exists & IPPF_USE_MIN_MTU) { 7594 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7595 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7596 } 7597 7598 if (option_exists & IPPF_NEXTHOP) { 7599 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7600 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7601 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7602 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7603 } 7604 7605 /* 7606 * tell IP this is an ip6i_t private header 7607 */ 7608 ip6i->ip6i_nxt = IPPROTO_RAW; 7609 } 7610 7611 /* Initialize IPv6 header */ 7612 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7613 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7614 7615 /* Set the hoplimit of the outgoing packet. */ 7616 if (option_exists & IPPF_HOPLIMIT) { 7617 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7618 ip6h->ip6_hops = ipp->ipp_hoplimit; 7619 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7620 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7621 ip6h->ip6_hops = udp->udp_multicast_ttl; 7622 if (option_exists & IPPF_MULTICAST_HOPS) 7623 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7624 } else { 7625 ip6h->ip6_hops = udp->udp_ttl; 7626 if (option_exists & IPPF_UNICAST_HOPS) 7627 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7628 } 7629 7630 if (option_exists & IPPF_ADDR) { 7631 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7632 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7633 ip6h->ip6_src = tipp->ipp_addr; 7634 } else { 7635 /* 7636 * The source address was not set using IPV6_PKTINFO. 7637 * First look at the bound source. 7638 * If unspecified fallback to __sin6_src_id. 7639 */ 7640 ip6h->ip6_src = udp->udp_v6src; 7641 if (sin6->__sin6_src_id != 0 && 7642 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7643 ip_srcid_find_id(sin6->__sin6_src_id, 7644 &ip6h->ip6_src, connp->conn_zoneid, 7645 us->us_netstack); 7646 } 7647 } 7648 7649 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7650 cp = (uint8_t *)&ip6h[1]; 7651 7652 /* 7653 * Here's where we have to start stringing together 7654 * any extension headers in the right order: 7655 * Hop-by-hop, destination, routing, and final destination opts. 7656 */ 7657 if (option_exists & IPPF_HOPOPTS) { 7658 /* Hop-by-hop options */ 7659 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7660 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7661 if (hopoptslen == 0) { 7662 hopoptsptr = tipp->ipp_hopopts; 7663 hopoptslen = tipp->ipp_hopoptslen; 7664 is_ancillary = B_TRUE; 7665 } 7666 7667 *nxthdr_ptr = IPPROTO_HOPOPTS; 7668 nxthdr_ptr = &hbh->ip6h_nxt; 7669 7670 bcopy(hopoptsptr, cp, hopoptslen); 7671 cp += hopoptslen; 7672 7673 if (hopoptsptr != NULL && !is_ancillary) { 7674 kmem_free(hopoptsptr, hopoptslen); 7675 hopoptsptr = NULL; 7676 hopoptslen = 0; 7677 } 7678 } 7679 /* 7680 * En-route destination options 7681 * Only do them if there's a routing header as well 7682 */ 7683 if (option_exists & IPPF_RTDSTOPTS) { 7684 ip6_dest_t *dst = (ip6_dest_t *)cp; 7685 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7686 7687 *nxthdr_ptr = IPPROTO_DSTOPTS; 7688 nxthdr_ptr = &dst->ip6d_nxt; 7689 7690 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7691 cp += tipp->ipp_rtdstoptslen; 7692 } 7693 /* 7694 * Routing header next 7695 */ 7696 if (option_exists & IPPF_RTHDR) { 7697 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7698 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7699 7700 *nxthdr_ptr = IPPROTO_ROUTING; 7701 nxthdr_ptr = &rt->ip6r_nxt; 7702 7703 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7704 cp += tipp->ipp_rthdrlen; 7705 } 7706 /* 7707 * Do ultimate destination options 7708 */ 7709 if (option_exists & IPPF_DSTOPTS) { 7710 ip6_dest_t *dest = (ip6_dest_t *)cp; 7711 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7712 7713 *nxthdr_ptr = IPPROTO_DSTOPTS; 7714 nxthdr_ptr = &dest->ip6d_nxt; 7715 7716 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7717 cp += tipp->ipp_dstoptslen; 7718 } 7719 /* 7720 * Now set the last header pointer to the proto passed in 7721 */ 7722 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7723 *nxthdr_ptr = IPPROTO_UDP; 7724 7725 /* Update UDP header */ 7726 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7727 udph->uha_dst_port = sin6->sin6_port; 7728 udph->uha_src_port = udp->udp_port; 7729 7730 /* 7731 * Copy in the destination address 7732 */ 7733 ip6h->ip6_dst = ip6_dst; 7734 7735 ip6h->ip6_vcf = 7736 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7737 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7738 7739 if (option_exists & IPPF_TCLASS) { 7740 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7741 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7742 tipp->ipp_tclass); 7743 } 7744 7745 if (option_exists & IPPF_RTHDR) { 7746 ip6_rthdr_t *rth; 7747 7748 /* 7749 * Perform any processing needed for source routing. 7750 * We know that all extension headers will be in the same mblk 7751 * as the IPv6 header. 7752 */ 7753 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7754 if (rth != NULL && rth->ip6r_segleft != 0) { 7755 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7756 /* 7757 * Drop packet - only support Type 0 routing. 7758 * Notify the application as well. 7759 */ 7760 *error = EPROTO; 7761 goto done; 7762 } 7763 7764 /* 7765 * rth->ip6r_len is twice the number of 7766 * addresses in the header. Thus it must be even. 7767 */ 7768 if (rth->ip6r_len & 0x1) { 7769 *error = EPROTO; 7770 goto done; 7771 } 7772 /* 7773 * Shuffle the routing header and ip6_dst 7774 * addresses, and get the checksum difference 7775 * between the first hop (in ip6_dst) and 7776 * the destination (in the last routing hdr entry). 7777 */ 7778 csum = ip_massage_options_v6(ip6h, rth, 7779 us->us_netstack); 7780 /* 7781 * Verify that the first hop isn't a mapped address. 7782 * Routers along the path need to do this verification 7783 * for subsequent hops. 7784 */ 7785 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7786 *error = EADDRNOTAVAIL; 7787 goto done; 7788 } 7789 7790 cp += (rth->ip6r_len + 1)*8; 7791 } 7792 } 7793 7794 /* count up length of UDP packet */ 7795 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7796 if ((mp2 = mp1->b_cont) != NULL) { 7797 do { 7798 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7799 ip_len += (uint32_t)MBLKL(mp2); 7800 } while ((mp2 = mp2->b_cont) != NULL); 7801 } 7802 7803 /* 7804 * If the size of the packet is greater than the maximum allowed by 7805 * ip, return an error. Passing this down could cause panics because 7806 * the size will have wrapped and be inconsistent with the msg size. 7807 */ 7808 if (ip_len > IP_MAXPACKET) { 7809 *error = EMSGSIZE; 7810 goto done; 7811 } 7812 7813 /* Store the UDP length. Subtract length of extension hdrs */ 7814 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7815 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7816 7817 /* 7818 * We make it easy for IP to include our pseudo header 7819 * by putting our length in uh_checksum, modified (if 7820 * we have a routing header) by the checksum difference 7821 * between the ultimate destination and first hop addresses. 7822 * Note: UDP over IPv6 must always checksum the packet. 7823 */ 7824 csum += udph->uha_length; 7825 csum = (csum & 0xFFFF) + (csum >> 16); 7826 udph->uha_checksum = (uint16_t)csum; 7827 7828 #ifdef _LITTLE_ENDIAN 7829 ip_len = htons(ip_len); 7830 #endif 7831 ip6h->ip6_plen = ip_len; 7832 if (DB_CRED(mp) != NULL) 7833 mblk_setcred(mp1, DB_CRED(mp)); 7834 7835 if (DB_TYPE(mp) != M_DATA) { 7836 ASSERT(mp != mp1); 7837 freeb(mp); 7838 } 7839 7840 /* mp has been consumed and we'll return success */ 7841 ASSERT(*error == 0); 7842 mp = NULL; 7843 7844 /* We're done. Pass the packet to IP */ 7845 BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); 7846 ip_output_v6(connp, mp1, q, IP_WPUT); 7847 7848 done: 7849 if (hopoptsptr != NULL && !is_ancillary) { 7850 kmem_free(hopoptsptr, hopoptslen); 7851 hopoptsptr = NULL; 7852 } 7853 if (*error != 0) { 7854 ASSERT(mp != NULL); 7855 BUMP_MIB(&udp->udp_mib, udpOutErrors); 7856 } 7857 return (mp); 7858 } 7859 7860 static void 7861 udp_wput_other(queue_t *q, mblk_t *mp) 7862 { 7863 uchar_t *rptr = mp->b_rptr; 7864 struct datab *db; 7865 struct iocblk *iocp; 7866 cred_t *cr; 7867 conn_t *connp = Q_TO_CONN(q); 7868 udp_t *udp = connp->conn_udp; 7869 udp_stack_t *us; 7870 7871 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7872 "udp_wput_other_start: q %p", q); 7873 7874 us = udp->udp_us; 7875 db = mp->b_datap; 7876 7877 cr = DB_CREDDEF(mp, connp->conn_cred); 7878 7879 switch (db->db_type) { 7880 case M_PROTO: 7881 case M_PCPROTO: 7882 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7883 freemsg(mp); 7884 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7885 "udp_wput_other_end: q %p (%S)", 7886 q, "protoshort"); 7887 return; 7888 } 7889 switch (((t_primp_t)rptr)->type) { 7890 case T_ADDR_REQ: 7891 udp_addr_req(q, mp); 7892 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7893 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7894 return; 7895 case O_T_BIND_REQ: 7896 case T_BIND_REQ: 7897 udp_bind(q, mp); 7898 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7899 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7900 return; 7901 case T_CONN_REQ: 7902 udp_connect(q, mp); 7903 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7904 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7905 return; 7906 case T_CAPABILITY_REQ: 7907 udp_capability_req(q, mp); 7908 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7909 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7910 return; 7911 case T_INFO_REQ: 7912 udp_info_req(q, mp); 7913 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7914 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7915 return; 7916 case T_UNITDATA_REQ: 7917 /* 7918 * If a T_UNITDATA_REQ gets here, the address must 7919 * be bad. Valid T_UNITDATA_REQs are handled 7920 * in udp_wput. 7921 */ 7922 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7923 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7924 "udp_wput_other_end: q %p (%S)", 7925 q, "unitdatareq"); 7926 return; 7927 case T_UNBIND_REQ: 7928 udp_unbind(q, mp); 7929 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7930 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7931 return; 7932 case T_SVR4_OPTMGMT_REQ: 7933 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7934 /* 7935 * Use upper queue for option processing in 7936 * case the request is not handled at this 7937 * level and needs to be passed down to IP. 7938 */ 7939 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7940 mp, cr, &udp_opt_obj); 7941 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7942 "udp_wput_other_end: q %p (%S)", 7943 q, "optmgmtreq"); 7944 return; 7945 7946 case T_OPTMGMT_REQ: 7947 /* 7948 * Use upper queue for option processing in 7949 * case the request is not handled at this 7950 * level and needs to be passed down to IP. 7951 */ 7952 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7953 mp, cr, &udp_opt_obj); 7954 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7955 "udp_wput_other_end: q %p (%S)", 7956 q, "optmgmtreq"); 7957 return; 7958 7959 case T_DISCON_REQ: 7960 udp_disconnect(q, mp); 7961 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7962 "udp_wput_other_end: q %p (%S)", 7963 q, "disconreq"); 7964 return; 7965 7966 /* The following TPI message is not supported by udp. */ 7967 case O_T_CONN_RES: 7968 case T_CONN_RES: 7969 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7970 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7971 "udp_wput_other_end: q %p (%S)", 7972 q, "connres/disconreq"); 7973 return; 7974 7975 /* The following 3 TPI messages are illegal for udp. */ 7976 case T_DATA_REQ: 7977 case T_EXDATA_REQ: 7978 case T_ORDREL_REQ: 7979 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7980 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7981 "udp_wput_other_end: q %p (%S)", 7982 q, "data/exdata/ordrel"); 7983 return; 7984 default: 7985 break; 7986 } 7987 break; 7988 case M_FLUSH: 7989 if (*rptr & FLUSHW) 7990 flushq(q, FLUSHDATA); 7991 break; 7992 case M_IOCTL: 7993 iocp = (struct iocblk *)mp->b_rptr; 7994 switch (iocp->ioc_cmd) { 7995 case TI_GETPEERNAME: 7996 if (udp->udp_state != TS_DATA_XFER) { 7997 /* 7998 * If a default destination address has not 7999 * been associated with the stream, then we 8000 * don't know the peer's name. 8001 */ 8002 iocp->ioc_error = ENOTCONN; 8003 iocp->ioc_count = 0; 8004 mp->b_datap->db_type = M_IOCACK; 8005 putnext(UDP_RD(q), mp); 8006 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8007 "udp_wput_other_end: q %p (%S)", 8008 q, "getpeername"); 8009 return; 8010 } 8011 /* FALLTHRU */ 8012 case TI_GETMYNAME: { 8013 /* 8014 * For TI_GETPEERNAME and TI_GETMYNAME, we first 8015 * need to copyin the user's strbuf structure. 8016 * Processing will continue in the M_IOCDATA case 8017 * below. 8018 */ 8019 mi_copyin(q, mp, NULL, 8020 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 8021 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8022 "udp_wput_other_end: q %p (%S)", 8023 q, "getmyname"); 8024 return; 8025 } 8026 case ND_SET: 8027 /* nd_getset performs the necessary checking */ 8028 case ND_GET: 8029 if (nd_getset(q, us->us_nd, mp)) { 8030 putnext(UDP_RD(q), mp); 8031 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8032 "udp_wput_other_end: q %p (%S)", 8033 q, "get"); 8034 return; 8035 } 8036 break; 8037 case _SIOCSOCKFALLBACK: 8038 /* 8039 * Either sockmod is about to be popped and the 8040 * socket would now be treated as a plain stream, 8041 * or a module is about to be pushed so we could 8042 * no longer use read-side synchronous stream. 8043 * Drain any queued data and disable direct sockfs 8044 * interface from now on. 8045 */ 8046 if (!udp->udp_issocket) { 8047 DB_TYPE(mp) = M_IOCNAK; 8048 iocp->ioc_error = EINVAL; 8049 } else { 8050 udp->udp_issocket = B_FALSE; 8051 if (udp->udp_direct_sockfs) { 8052 /* 8053 * Disable read-side synchronous 8054 * stream interface and drain any 8055 * queued data. 8056 */ 8057 udp_rcv_drain(UDP_RD(q), udp, 8058 B_FALSE); 8059 ASSERT(!udp->udp_direct_sockfs); 8060 UDP_STAT(us, udp_sock_fallback); 8061 } 8062 DB_TYPE(mp) = M_IOCACK; 8063 iocp->ioc_error = 0; 8064 } 8065 iocp->ioc_count = 0; 8066 iocp->ioc_rval = 0; 8067 putnext(UDP_RD(q), mp); 8068 return; 8069 default: 8070 break; 8071 } 8072 break; 8073 case M_IOCDATA: 8074 udp_wput_iocdata(q, mp); 8075 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8076 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 8077 return; 8078 default: 8079 /* Unrecognized messages are passed through without change. */ 8080 break; 8081 } 8082 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8083 "udp_wput_other_end: q %p (%S)", q, "end"); 8084 ip_output(connp, mp, q, IP_WPUT); 8085 } 8086 8087 /* ARGSUSED */ 8088 static void 8089 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 8090 { 8091 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 8092 udp_exit((conn_t *)arg); 8093 } 8094 8095 /* 8096 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 8097 * messages. 8098 */ 8099 static void 8100 udp_wput_iocdata(queue_t *q, mblk_t *mp) 8101 { 8102 mblk_t *mp1; 8103 STRUCT_HANDLE(strbuf, sb); 8104 uint16_t port; 8105 in6_addr_t v6addr; 8106 ipaddr_t v4addr; 8107 uint32_t flowinfo = 0; 8108 int addrlen; 8109 udp_t *udp = Q_TO_UDP(q); 8110 8111 /* Make sure it is one of ours. */ 8112 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8113 case TI_GETMYNAME: 8114 case TI_GETPEERNAME: 8115 break; 8116 default: 8117 ip_output(udp->udp_connp, mp, q, IP_WPUT); 8118 return; 8119 } 8120 8121 q = WR(UDP_RD(q)); 8122 switch (mi_copy_state(q, mp, &mp1)) { 8123 case -1: 8124 return; 8125 case MI_COPY_CASE(MI_COPY_IN, 1): 8126 break; 8127 case MI_COPY_CASE(MI_COPY_OUT, 1): 8128 /* 8129 * The address has been copied out, so now 8130 * copyout the strbuf. 8131 */ 8132 mi_copyout(q, mp); 8133 return; 8134 case MI_COPY_CASE(MI_COPY_OUT, 2): 8135 /* 8136 * The address and strbuf have been copied out. 8137 * We're done, so just acknowledge the original 8138 * M_IOCTL. 8139 */ 8140 mi_copy_done(q, mp, 0); 8141 return; 8142 default: 8143 /* 8144 * Something strange has happened, so acknowledge 8145 * the original M_IOCTL with an EPROTO error. 8146 */ 8147 mi_copy_done(q, mp, EPROTO); 8148 return; 8149 } 8150 8151 /* 8152 * Now we have the strbuf structure for TI_GETMYNAME 8153 * and TI_GETPEERNAME. Next we copyout the requested 8154 * address and then we'll copyout the strbuf. 8155 */ 8156 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 8157 (void *)mp1->b_rptr); 8158 if (udp->udp_family == AF_INET) 8159 addrlen = sizeof (sin_t); 8160 else 8161 addrlen = sizeof (sin6_t); 8162 8163 if (STRUCT_FGET(sb, maxlen) < addrlen) { 8164 mi_copy_done(q, mp, EINVAL); 8165 return; 8166 } 8167 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8168 case TI_GETMYNAME: 8169 if (udp->udp_family == AF_INET) { 8170 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8171 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 8172 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8173 v4addr = V4_PART_OF_V6(udp->udp_v6src); 8174 } else { 8175 /* 8176 * INADDR_ANY 8177 * udp_v6src is not set, we might be bound to 8178 * broadcast/multicast. Use udp_bound_v6src as 8179 * local address instead (that could 8180 * also still be INADDR_ANY) 8181 */ 8182 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 8183 } 8184 } else { 8185 /* udp->udp_family == AF_INET6 */ 8186 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8187 v6addr = udp->udp_v6src; 8188 } else { 8189 /* 8190 * UNSPECIFIED 8191 * udp_v6src is not set, we might be bound to 8192 * broadcast/multicast. Use udp_bound_v6src as 8193 * local address instead (that could 8194 * also still be UNSPECIFIED) 8195 */ 8196 v6addr = udp->udp_bound_v6src; 8197 } 8198 } 8199 port = udp->udp_port; 8200 break; 8201 case TI_GETPEERNAME: 8202 if (udp->udp_state != TS_DATA_XFER) { 8203 mi_copy_done(q, mp, ENOTCONN); 8204 return; 8205 } 8206 if (udp->udp_family == AF_INET) { 8207 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8208 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 8209 } else { 8210 /* udp->udp_family == AF_INET6) */ 8211 v6addr = udp->udp_v6dst; 8212 flowinfo = udp->udp_flowinfo; 8213 } 8214 port = udp->udp_dstport; 8215 break; 8216 default: 8217 mi_copy_done(q, mp, EPROTO); 8218 return; 8219 } 8220 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 8221 if (!mp1) 8222 return; 8223 8224 if (udp->udp_family == AF_INET) { 8225 sin_t *sin; 8226 8227 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 8228 sin = (sin_t *)mp1->b_rptr; 8229 mp1->b_wptr = (uchar_t *)&sin[1]; 8230 *sin = sin_null; 8231 sin->sin_family = AF_INET; 8232 sin->sin_addr.s_addr = v4addr; 8233 sin->sin_port = port; 8234 } else { 8235 /* udp->udp_family == AF_INET6 */ 8236 sin6_t *sin6; 8237 8238 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 8239 sin6 = (sin6_t *)mp1->b_rptr; 8240 mp1->b_wptr = (uchar_t *)&sin6[1]; 8241 *sin6 = sin6_null; 8242 sin6->sin6_family = AF_INET6; 8243 sin6->sin6_flowinfo = flowinfo; 8244 sin6->sin6_addr = v6addr; 8245 sin6->sin6_port = port; 8246 } 8247 /* Copy out the address */ 8248 mi_copyout(q, mp); 8249 } 8250 8251 8252 static int 8253 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 8254 udpattrs_t *udpattrs) 8255 { 8256 struct T_unitdata_req *udreqp; 8257 int is_absreq_failure; 8258 cred_t *cr; 8259 conn_t *connp = Q_TO_CONN(q); 8260 8261 ASSERT(((t_primp_t)mp->b_rptr)->type); 8262 8263 cr = DB_CREDDEF(mp, connp->conn_cred); 8264 8265 udreqp = (struct T_unitdata_req *)mp->b_rptr; 8266 8267 /* 8268 * Use upper queue for option processing since the callback 8269 * routines expect to be called in UDP instance instead of IP. 8270 */ 8271 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 8272 udreqp->OPT_offset, cr, &udp_opt_obj, 8273 udpattrs, &is_absreq_failure); 8274 8275 if (*errorp != 0) { 8276 /* 8277 * Note: No special action needed in this 8278 * module for "is_absreq_failure" 8279 */ 8280 return (-1); /* failure */ 8281 } 8282 ASSERT(is_absreq_failure == 0); 8283 return (0); /* success */ 8284 } 8285 8286 void 8287 udp_ddi_init(void) 8288 { 8289 UDP6_MAJ = ddi_name_to_major(UDP6); 8290 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 8291 udp_opt_obj.odb_opt_arr_cnt); 8292 8293 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 8294 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 8295 8296 /* 8297 * We want to be informed each time a stack is created or 8298 * destroyed in the kernel, so we can maintain the 8299 * set of udp_stack_t's. 8300 */ 8301 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 8302 } 8303 8304 void 8305 udp_ddi_destroy(void) 8306 { 8307 netstack_unregister(NS_UDP); 8308 8309 kmem_cache_destroy(udp_cache); 8310 } 8311 8312 /* 8313 * Initialize the UDP stack instance. 8314 */ 8315 static void * 8316 udp_stack_init(netstackid_t stackid, netstack_t *ns) 8317 { 8318 udp_stack_t *us; 8319 udpparam_t *pa; 8320 int i; 8321 8322 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 8323 us->us_netstack = ns; 8324 8325 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 8326 us->us_epriv_ports[0] = 2049; 8327 us->us_epriv_ports[1] = 4045; 8328 8329 /* 8330 * The smallest anonymous port in the priviledged port range which UDP 8331 * looks for free port. Use in the option UDP_ANONPRIVBIND. 8332 */ 8333 us->us_min_anonpriv_port = 512; 8334 8335 us->us_bind_fanout_size = udp_bind_fanout_size; 8336 8337 /* Roundup variable that might have been modified in /etc/system */ 8338 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 8339 /* Not a power of two. Round up to nearest power of two */ 8340 for (i = 0; i < 31; i++) { 8341 if (us->us_bind_fanout_size < (1 << i)) 8342 break; 8343 } 8344 us->us_bind_fanout_size = 1 << i; 8345 } 8346 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 8347 sizeof (udp_fanout_t), KM_SLEEP); 8348 for (i = 0; i < us->us_bind_fanout_size; i++) { 8349 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 8350 NULL); 8351 } 8352 8353 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 8354 8355 us->us_param_arr = pa; 8356 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 8357 8358 (void) udp_param_register(&us->us_nd, 8359 us->us_param_arr, A_CNT(udp_param_arr)); 8360 8361 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 8362 us->us_mibkp = udp_kstat_init(stackid); 8363 return (us); 8364 } 8365 8366 /* 8367 * Free the UDP stack instance. 8368 */ 8369 static void 8370 udp_stack_fini(netstackid_t stackid, void *arg) 8371 { 8372 udp_stack_t *us = (udp_stack_t *)arg; 8373 int i; 8374 8375 for (i = 0; i < us->us_bind_fanout_size; i++) { 8376 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 8377 } 8378 8379 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 8380 sizeof (udp_fanout_t)); 8381 8382 us->us_bind_fanout = NULL; 8383 8384 nd_free(&us->us_nd); 8385 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 8386 us->us_param_arr = NULL; 8387 8388 udp_kstat_fini(stackid, us->us_mibkp); 8389 us->us_mibkp = NULL; 8390 8391 udp_kstat2_fini(stackid, us->us_kstat); 8392 us->us_kstat = NULL; 8393 bzero(&us->us_statistics, sizeof (us->us_statistics)); 8394 kmem_free(us, sizeof (*us)); 8395 } 8396 8397 static void * 8398 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 8399 { 8400 kstat_t *ksp; 8401 8402 udp_stat_t template = { 8403 { "udp_ip_send", KSTAT_DATA_UINT64 }, 8404 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 8405 { "udp_ire_null", KSTAT_DATA_UINT64 }, 8406 { "udp_drain", KSTAT_DATA_UINT64 }, 8407 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 8408 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 8409 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 8410 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 8411 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 8412 { "udp_out_opt", KSTAT_DATA_UINT64 }, 8413 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 8414 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 8415 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 8416 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 8417 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 8418 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 8419 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 8420 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 8421 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 8422 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 8423 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 8424 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 8425 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 8426 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 8427 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 8428 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 8429 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 8430 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 8431 #ifdef DEBUG 8432 { "udp_data_conn", KSTAT_DATA_UINT64 }, 8433 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 8434 #endif 8435 }; 8436 8437 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 8438 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 8439 KSTAT_FLAG_VIRTUAL, stackid); 8440 8441 if (ksp == NULL) 8442 return (NULL); 8443 8444 bcopy(&template, us_statisticsp, sizeof (template)); 8445 ksp->ks_data = (void *)us_statisticsp; 8446 ksp->ks_private = (void *)(uintptr_t)stackid; 8447 8448 kstat_install(ksp); 8449 return (ksp); 8450 } 8451 8452 static void 8453 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 8454 { 8455 if (ksp != NULL) { 8456 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 8457 kstat_delete_netstack(ksp, stackid); 8458 } 8459 } 8460 8461 static void * 8462 udp_kstat_init(netstackid_t stackid) 8463 { 8464 kstat_t *ksp; 8465 8466 udp_named_kstat_t template = { 8467 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 8468 { "inErrors", KSTAT_DATA_UINT32, 0 }, 8469 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 8470 { "entrySize", KSTAT_DATA_INT32, 0 }, 8471 { "entry6Size", KSTAT_DATA_INT32, 0 }, 8472 { "outErrors", KSTAT_DATA_UINT32, 0 }, 8473 }; 8474 8475 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 8476 KSTAT_TYPE_NAMED, 8477 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 8478 8479 if (ksp == NULL || ksp->ks_data == NULL) 8480 return (NULL); 8481 8482 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 8483 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 8484 8485 bcopy(&template, ksp->ks_data, sizeof (template)); 8486 ksp->ks_update = udp_kstat_update; 8487 ksp->ks_private = (void *)(uintptr_t)stackid; 8488 8489 kstat_install(ksp); 8490 return (ksp); 8491 } 8492 8493 static void 8494 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 8495 { 8496 if (ksp != NULL) { 8497 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 8498 kstat_delete_netstack(ksp, stackid); 8499 } 8500 } 8501 8502 static int 8503 udp_kstat_update(kstat_t *kp, int rw) 8504 { 8505 udp_named_kstat_t *udpkp; 8506 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 8507 netstack_t *ns; 8508 udp_stack_t *us; 8509 8510 if ((kp == NULL) || (kp->ks_data == NULL)) 8511 return (EIO); 8512 8513 if (rw == KSTAT_WRITE) 8514 return (EACCES); 8515 8516 ns = netstack_find_by_stackid(stackid); 8517 if (ns == NULL) 8518 return (-1); 8519 us = ns->netstack_udp; 8520 if (us == NULL) { 8521 netstack_rele(ns); 8522 return (-1); 8523 } 8524 udpkp = (udp_named_kstat_t *)kp->ks_data; 8525 8526 udpkp->inDatagrams.value.ui32 = us->us_udp_mib.udpHCInDatagrams; 8527 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 8528 udpkp->outDatagrams.value.ui32 = us->us_udp_mib.udpHCOutDatagrams; 8529 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 8530 netstack_rele(ns); 8531 return (0); 8532 } 8533 8534 /* ARGSUSED */ 8535 static void 8536 udp_rput(queue_t *q, mblk_t *mp) 8537 { 8538 /* 8539 * We get here whenever we do qreply() from IP, 8540 * i.e as part of handlings ioctls, etc. 8541 */ 8542 putnext(q, mp); 8543 } 8544 8545 /* 8546 * Read-side synchronous stream info entry point, called as a 8547 * result of handling certain STREAMS ioctl operations. 8548 */ 8549 static int 8550 udp_rinfop(queue_t *q, infod_t *dp) 8551 { 8552 mblk_t *mp; 8553 uint_t cmd = dp->d_cmd; 8554 int res = 0; 8555 int error = 0; 8556 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 8557 struct stdata *stp = STREAM(q); 8558 8559 mutex_enter(&udp->udp_drain_lock); 8560 /* If shutdown on read has happened, return nothing */ 8561 mutex_enter(&stp->sd_lock); 8562 if (stp->sd_flag & STREOF) { 8563 mutex_exit(&stp->sd_lock); 8564 goto done; 8565 } 8566 mutex_exit(&stp->sd_lock); 8567 8568 if ((mp = udp->udp_rcv_list_head) == NULL) 8569 goto done; 8570 8571 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8572 8573 if (cmd & INFOD_COUNT) { 8574 /* 8575 * Return the number of messages. 8576 */ 8577 dp->d_count += udp->udp_rcv_msgcnt; 8578 res |= INFOD_COUNT; 8579 } 8580 if (cmd & INFOD_BYTES) { 8581 /* 8582 * Return size of all data messages. 8583 */ 8584 dp->d_bytes += udp->udp_rcv_cnt; 8585 res |= INFOD_BYTES; 8586 } 8587 if (cmd & INFOD_FIRSTBYTES) { 8588 /* 8589 * Return size of first data message. 8590 */ 8591 dp->d_bytes = msgdsize(mp); 8592 res |= INFOD_FIRSTBYTES; 8593 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8594 } 8595 if (cmd & INFOD_COPYOUT) { 8596 mblk_t *mp1 = mp->b_cont; 8597 int n; 8598 /* 8599 * Return data contents of first message. 8600 */ 8601 ASSERT(DB_TYPE(mp1) == M_DATA); 8602 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8603 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8604 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8605 UIO_READ, dp->d_uiop)) != 0) { 8606 goto done; 8607 } 8608 mp1 = mp1->b_cont; 8609 } 8610 res |= INFOD_COPYOUT; 8611 dp->d_cmd &= ~INFOD_COPYOUT; 8612 } 8613 done: 8614 mutex_exit(&udp->udp_drain_lock); 8615 8616 dp->d_res |= res; 8617 8618 return (error); 8619 } 8620 8621 /* 8622 * Read-side synchronous stream entry point. This is called as a result 8623 * of recv/read operation done at sockfs, and is guaranteed to execute 8624 * outside of the interrupt thread context. It returns a single datagram 8625 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8626 */ 8627 static int 8628 udp_rrw(queue_t *q, struiod_t *dp) 8629 { 8630 mblk_t *mp; 8631 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 8632 udp_stack_t *us = udp->udp_us; 8633 8634 /* We should never get here when we're in SNMP mode */ 8635 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 8636 8637 /* 8638 * Dequeue datagram from the head of the list and return 8639 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8640 * set/cleared depending on whether or not there's data 8641 * remaining in the list. 8642 */ 8643 mutex_enter(&udp->udp_drain_lock); 8644 if (!udp->udp_direct_sockfs) { 8645 mutex_exit(&udp->udp_drain_lock); 8646 UDP_STAT(us, udp_rrw_busy); 8647 return (EBUSY); 8648 } 8649 if ((mp = udp->udp_rcv_list_head) != NULL) { 8650 uint_t size = msgdsize(mp); 8651 8652 /* Last datagram in the list? */ 8653 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8654 udp->udp_rcv_list_tail = NULL; 8655 mp->b_next = NULL; 8656 8657 udp->udp_rcv_cnt -= size; 8658 udp->udp_rcv_msgcnt--; 8659 UDP_STAT(us, udp_rrw_msgcnt); 8660 8661 /* No longer flow-controlling? */ 8662 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8663 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8664 udp->udp_drain_qfull = B_FALSE; 8665 } 8666 if (udp->udp_rcv_list_head == NULL) { 8667 /* 8668 * Either we just dequeued the last datagram or 8669 * we get here from sockfs and have nothing to 8670 * return; in this case clear RSLEEP. 8671 */ 8672 ASSERT(udp->udp_rcv_cnt == 0); 8673 ASSERT(udp->udp_rcv_msgcnt == 0); 8674 ASSERT(udp->udp_rcv_list_tail == NULL); 8675 STR_WAKEUP_CLEAR(STREAM(q)); 8676 } else { 8677 /* 8678 * More data follows; we need udp_rrw() to be 8679 * called in future to pick up the rest. 8680 */ 8681 STR_WAKEUP_SET(STREAM(q)); 8682 } 8683 mutex_exit(&udp->udp_drain_lock); 8684 dp->d_mp = mp; 8685 return (0); 8686 } 8687 8688 /* 8689 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8690 * list; this is typically executed within the interrupt thread context 8691 * and so we do things as quickly as possible. 8692 */ 8693 static void 8694 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8695 { 8696 ASSERT(q == RD(q)); 8697 ASSERT(pkt_len == msgdsize(mp)); 8698 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8699 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8700 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8701 8702 mutex_enter(&udp->udp_drain_lock); 8703 /* 8704 * Wake up and signal the receiving app; it is okay to do this 8705 * before enqueueing the mp because we are holding the drain lock. 8706 * One of the advantages of synchronous stream is the ability for 8707 * us to find out when the application performs a read on the 8708 * socket by way of udp_rrw() entry point being called. We need 8709 * to generate SIGPOLL/SIGIO for each received data in the case 8710 * of asynchronous socket just as in the strrput() case. However, 8711 * we only wake the application up when necessary, i.e. during the 8712 * first enqueue. When udp_rrw() is called, we send up a single 8713 * datagram upstream and call STR_WAKEUP_SET() again when there 8714 * are still data remaining in our receive queue. 8715 */ 8716 if (udp->udp_rcv_list_head == NULL) { 8717 STR_WAKEUP_SET(STREAM(q)); 8718 udp->udp_rcv_list_head = mp; 8719 } else { 8720 udp->udp_rcv_list_tail->b_next = mp; 8721 } 8722 udp->udp_rcv_list_tail = mp; 8723 udp->udp_rcv_cnt += pkt_len; 8724 udp->udp_rcv_msgcnt++; 8725 8726 /* Need to flow-control? */ 8727 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8728 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8729 udp->udp_drain_qfull = B_TRUE; 8730 8731 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8732 STR_SENDSIG(STREAM(q)); 8733 mutex_exit(&udp->udp_drain_lock); 8734 } 8735 8736 /* 8737 * Drain the contents of receive list to the module upstream; we do 8738 * this during close or when we fallback to the slow mode due to 8739 * sockmod being popped or a module being pushed on top of us. 8740 */ 8741 static void 8742 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8743 { 8744 mblk_t *mp; 8745 udp_stack_t *us = udp->udp_us; 8746 8747 ASSERT(q == RD(q)); 8748 8749 mutex_enter(&udp->udp_drain_lock); 8750 /* 8751 * There is no race with a concurrent udp_input() sending 8752 * up packets using putnext() after we have cleared the 8753 * udp_direct_sockfs flag but before we have completed 8754 * sending up the packets in udp_rcv_list, since we are 8755 * either a writer or we have quiesced the conn. 8756 */ 8757 udp->udp_direct_sockfs = B_FALSE; 8758 mutex_exit(&udp->udp_drain_lock); 8759 8760 if (udp->udp_rcv_list_head != NULL) 8761 UDP_STAT(us, udp_drain); 8762 8763 /* 8764 * Send up everything via putnext(); note here that we 8765 * don't need the udp_drain_lock to protect us since 8766 * nothing can enter udp_rrw() and that we currently 8767 * have exclusive access to this udp. 8768 */ 8769 while ((mp = udp->udp_rcv_list_head) != NULL) { 8770 udp->udp_rcv_list_head = mp->b_next; 8771 mp->b_next = NULL; 8772 udp->udp_rcv_cnt -= msgdsize(mp); 8773 udp->udp_rcv_msgcnt--; 8774 if (closing) { 8775 freemsg(mp); 8776 } else { 8777 putnext(q, mp); 8778 } 8779 } 8780 ASSERT(udp->udp_rcv_cnt == 0); 8781 ASSERT(udp->udp_rcv_msgcnt == 0); 8782 ASSERT(udp->udp_rcv_list_head == NULL); 8783 udp->udp_rcv_list_tail = NULL; 8784 udp->udp_drain_qfull = B_FALSE; 8785 } 8786 8787 static size_t 8788 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8789 { 8790 udp_stack_t *us = udp->udp_us; 8791 8792 /* We add a bit of extra buffering */ 8793 size += size >> 1; 8794 if (size > us->us_max_buf) 8795 size = us->us_max_buf; 8796 8797 udp->udp_rcv_hiwat = size; 8798 return (size); 8799 } 8800 8801 /* 8802 * Little helper for IPsec's NAT-T processing. 8803 */ 8804 boolean_t 8805 udp_compute_checksum(netstack_t *ns) 8806 { 8807 udp_stack_t *us = ns->netstack_udp; 8808 8809 return (us->us_do_checksum); 8810 } 8811