1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/sdt.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/ip_ndp.h> 75 #include <inet/mi.h> 76 #include <inet/mib2.h> 77 #include <inet/nd.h> 78 #include <inet/optcom.h> 79 #include <inet/snmpcom.h> 80 #include <inet/kstatcom.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipclassifier.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipp_common.h> 85 86 /* 87 * The ipsec_info.h header file is here since it has the definition for the 88 * M_CTL message types used by IP to convey information to the ULP. The 89 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 90 */ 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 94 #include <sys/tsol/label.h> 95 #include <sys/tsol/tnet.h> 96 #include <rpc/pmap_prot.h> 97 98 /* 99 * Synchronization notes: 100 * 101 * UDP uses a combination of its internal perimeter, a global lock and 102 * a set of bind hash locks to protect its data structures. Please see 103 * the note above udp_mode_assertions for details about the internal 104 * perimeter. 105 * 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure. An UDP endpoint is removed from the bind hash list only 113 * when it is being unbound or being closed. The per bucket lock also 114 * protects a UDP endpoint's state changes. 115 * 116 * Plumbing notes: 117 * 118 * Both udp and ip are merged, but the streams plumbing is kept unchanged 119 * in that udp is always pushed atop /dev/ip. This is done to preserve 120 * backwards compatibility for certain applications which rely on such 121 * plumbing geometry to do things such as issuing I_POP on the stream 122 * in order to obtain direct access to /dev/ip, etc. 123 * 124 * All UDP processings happen in the /dev/ip instance; the udp module 125 * instance does not possess any state about the endpoint, and merely 126 * acts as a dummy module whose presence is to keep the streams plumbing 127 * appearance unchanged. At open time /dev/ip allocates a conn_t that 128 * happens to embed a udp_t. This stays dormant until the time udp is 129 * pushed, which indicates to /dev/ip that it must convert itself from 130 * an IP to a UDP endpoint. 131 * 132 * We only allow for the following plumbing cases: 133 * 134 * Normal: 135 * /dev/ip is first opened and later udp is pushed directly on top. 136 * This is the default action that happens when a udp socket or 137 * /dev/udp is opened. The conn_t created by /dev/ip instance is 138 * now shared and is marked with IPCL_UDP. 139 * 140 * SNMP-only: 141 * udp is pushed on top of a module other than /dev/ip. When this 142 * happens it will support only SNMP semantics. A new conn_t is 143 * allocated and marked with IPCL_UDPMOD. 144 * 145 * The above cases imply that we don't support any intermediate module to 146 * reside in between /dev/ip and udp -- in fact, we never supported such 147 * scenario in the past as the inter-layer communication semantics have 148 * always been private. Also note that the normal case allows for SNMP 149 * requests to be processed in addition to the rest of UDP operations. 150 * 151 * The normal case plumbing is depicted by the following diagram: 152 * 153 * +---------------+---------------+ 154 * | | | udp 155 * | udp_wq | udp_rq | 156 * | | UDP_RD | 157 * | | | 158 * +---------------+---------------+ 159 * | ^ 160 * v | 161 * +---------------+---------------+ 162 * | | | /dev/ip 163 * | ip_wq | ip_rq | conn_t 164 * | UDP_WR | | 165 * | | | 166 * +---------------+---------------+ 167 * 168 * Messages arriving at udp_wq from above will end up in ip_wq before 169 * it gets processed, i.e. udp write entry points will advance udp_wq 170 * and use its q_next value as ip_wq in order to use the conn_t that 171 * is stored in its q_ptr. Likewise, messages generated by ip to the 172 * module above udp will appear as if they are originated from udp_rq, 173 * i.e. putnext() calls to the module above udp is done using the 174 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 175 * nothing more than calling putnext(). 176 * 177 * The above implies the following rule of thumb: 178 * 179 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 180 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 181 * is no direct reference to conn_t from either udp_wq or udp_rq. 182 * 183 * 2. Write-side entry points of udp can obtain the conn_t via the 184 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 185 * 186 * 3. While in /dev/ip context, putnext() to the module above udp can 187 * be done by supplying the queue value obtained from UDP_RD(). 188 * 189 */ 190 191 static queue_t *UDP_WR(queue_t *); 192 static queue_t *UDP_RD(queue_t *); 193 194 struct kmem_cache *udp_cache; 195 196 /* For /etc/system control */ 197 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 198 199 #define NDD_TOO_QUICK_MSG \ 200 "ndd get info rate too high for non-privileged users, try again " \ 201 "later.\n" 202 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 203 204 /* Option processing attrs */ 205 typedef struct udpattrs_s { 206 union { 207 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 208 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 209 } udpattr_ippu; 210 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 211 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 212 mblk_t *udpattr_mb; 213 boolean_t udpattr_credset; 214 } udpattrs_t; 215 216 static void udp_addr_req(queue_t *q, mblk_t *mp); 217 static void udp_bind(queue_t *q, mblk_t *mp); 218 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 219 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 220 static int udp_build_hdrs(queue_t *q, udp_t *udp); 221 static void udp_capability_req(queue_t *q, mblk_t *mp); 222 static int udp_close(queue_t *q); 223 static void udp_connect(queue_t *q, mblk_t *mp); 224 static void udp_disconnect(queue_t *q, mblk_t *mp); 225 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 226 int sys_error); 227 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 228 t_scalar_t tlierr, int unixerr); 229 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 230 cred_t *cr); 231 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 232 char *value, caddr_t cp, cred_t *cr); 233 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 234 char *value, caddr_t cp, cred_t *cr); 235 static void udp_icmp_error(queue_t *q, mblk_t *mp); 236 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 237 static void udp_info_req(queue_t *q, mblk_t *mp); 238 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 239 t_scalar_t addr_length); 240 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 241 cred_t *credp); 242 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 243 int *errorp, udpattrs_t *udpattrs); 244 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 245 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 246 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 247 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 248 cred_t *cr); 249 static void udp_report_item(mblk_t *mp, udp_t *udp); 250 static void udp_rput(queue_t *q, mblk_t *mp); 251 static void udp_rput_other(queue_t *, mblk_t *); 252 static int udp_rinfop(queue_t *q, infod_t *dp); 253 static int udp_rrw(queue_t *q, struiod_t *dp); 254 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 255 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 256 cred_t *cr); 257 static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *); 258 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 259 t_scalar_t destlen, t_scalar_t err); 260 static void udp_unbind(queue_t *q, mblk_t *mp); 261 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 262 boolean_t random); 263 static void udp_wput(queue_t *q, mblk_t *mp); 264 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 265 int *, boolean_t); 266 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 267 int *error); 268 static void udp_wput_other(queue_t *q, mblk_t *mp); 269 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 270 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 271 socklen_t addrlen); 272 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 273 274 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 275 static void udp_stack_fini(netstackid_t stackid, void *arg); 276 277 static void *udp_kstat_init(netstackid_t stackid); 278 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 279 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 280 static void udp_kstat2_fini(netstackid_t, kstat_t *); 281 static int udp_kstat_update(kstat_t *kp, int rw); 282 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 283 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 284 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 285 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 286 287 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 288 uint_t pkt_len); 289 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 290 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 291 static void udp_exit(conn_t *); 292 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 293 #ifdef DEBUG 294 static void udp_mode_assertions(udp_t *, int); 295 #endif /* DEBUG */ 296 297 major_t UDP6_MAJ; 298 #define UDP6 "udp6" 299 300 #define UDP_RECV_HIWATER (56 * 1024) 301 #define UDP_RECV_LOWATER 128 302 #define UDP_XMIT_HIWATER (56 * 1024) 303 #define UDP_XMIT_LOWATER 1024 304 305 static struct module_info udp_info = { 306 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 307 }; 308 309 static struct qinit udp_rinit = { 310 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 311 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 312 }; 313 314 static struct qinit udp_winit = { 315 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 316 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 317 }; 318 319 /* Support for just SNMP if UDP is not pushed directly over device IP */ 320 struct qinit udp_snmp_rinit = { 321 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 322 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 323 }; 324 325 struct qinit udp_snmp_winit = { 326 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 327 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 328 }; 329 330 struct streamtab udpinfo = { 331 &udp_rinit, &udp_winit 332 }; 333 334 static sin_t sin_null; /* Zero address for quick clears */ 335 static sin6_t sin6_null; /* Zero address for quick clears */ 336 337 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 338 339 /* Default structure copied into T_INFO_ACK messages */ 340 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 341 T_INFO_ACK, 342 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 343 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 344 T_INVALID, /* CDATA_size. udp does not support connect data. */ 345 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 346 sizeof (sin_t), /* ADDR_size. */ 347 0, /* OPT_size - not initialized here */ 348 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 349 T_CLTS, /* SERV_type. udp supports connection-less. */ 350 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 351 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 352 }; 353 354 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 355 356 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 357 T_INFO_ACK, 358 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 359 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 360 T_INVALID, /* CDATA_size. udp does not support connect data. */ 361 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 362 sizeof (sin6_t), /* ADDR_size. */ 363 0, /* OPT_size - not initialized here */ 364 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 365 T_CLTS, /* SERV_type. udp supports connection-less. */ 366 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 367 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 368 }; 369 370 /* largest UDP port number */ 371 #define UDP_MAX_PORT 65535 372 373 /* 374 * Table of ND variables supported by udp. These are loaded into us_nd 375 * in udp_open. 376 * All of these are alterable, within the min/max values given, at run time. 377 */ 378 /* BEGIN CSTYLED */ 379 udpparam_t udp_param_arr[] = { 380 /*min max value name */ 381 { 0L, 256, 32, "udp_wroff_extra" }, 382 { 1L, 255, 255, "udp_ipv4_ttl" }, 383 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 384 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 385 { 0, 1, 1, "udp_do_checksum" }, 386 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 387 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 388 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 389 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 390 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 391 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 392 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 393 }; 394 /* END CSTYLED */ 395 396 /* Setable in /etc/system */ 397 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 398 uint32_t udp_random_anon_port = 1; 399 400 /* 401 * Hook functions to enable cluster networking. 402 * On non-clustered systems these vectors must always be NULL 403 */ 404 405 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 406 uint8_t *laddrp, in_port_t lport) = NULL; 407 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 408 uint8_t *laddrp, in_port_t lport) = NULL; 409 410 typedef union T_primitives *t_primp_t; 411 412 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 413 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 414 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 415 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 416 (mp)->b_prev = (mblk_t *)proc; \ 417 if ((udp)->udp_mphead == NULL) \ 418 (udp)->udp_mphead = (mp); \ 419 else \ 420 (udp)->udp_mptail->b_next = (mp); \ 421 (udp)->udp_mptail = (mp); \ 422 (udp)->udp_mpcount++; \ 423 } 424 425 #define UDP_READERS_INCREF(udp) { \ 426 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 427 (udp)->udp_reader_count++; \ 428 } 429 430 #define UDP_READERS_DECREF(udp) { \ 431 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 432 (udp)->udp_reader_count--; \ 433 if ((udp)->udp_reader_count == 0) \ 434 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 435 } 436 437 #define UDP_SQUEUE_DECREF(udp) { \ 438 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 439 (udp)->udp_squeue_count--; \ 440 if ((udp)->udp_squeue_count == 0) \ 441 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 442 } 443 444 /* 445 * Notes on UDP endpoint synchronization: 446 * 447 * UDP needs exclusive operation on a per endpoint basis, when executing 448 * functions that modify the endpoint state. udp_rput_other() deals with 449 * packets with IP options, and processing these packets end up having 450 * to update the endpoint's option related state. udp_wput_other() deals 451 * with control operations from the top, e.g. connect() that needs to 452 * update the endpoint state. These could be synchronized using locks, 453 * but the current version uses squeues for this purpose. squeues may 454 * give performance improvement for certain cases such as connected UDP 455 * sockets; thus the framework allows for using squeues. 456 * 457 * The perimeter routines are described as follows: 458 * 459 * udp_enter(): 460 * Enter the UDP endpoint perimeter. 461 * 462 * udp_become_writer(): 463 * Become exclusive on the UDP endpoint. Specifies a function 464 * that will be called exclusively either immediately or later 465 * when the perimeter is available exclusively. 466 * 467 * udp_exit(): 468 * Exit the UDP perimeter. 469 * 470 * Entering UDP from the top or from the bottom must be done using 471 * udp_enter(). No lock must be held while attempting to enter the UDP 472 * perimeter. When finished, udp_exit() must be called to get out of 473 * the perimeter. 474 * 475 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 476 * multiple threads may enter a UDP endpoint concurrently. This is used 477 * for sending and/or receiving normal data. Control operations and other 478 * special cases call udp_become_writer() to become exclusive on a per 479 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 480 * by definition serializes access to the conn_t. When there are no more 481 * pending messages on the squeue for the UDP connection, the endpoint 482 * reverts to MT_HOT mode. During the interregnum when not all MT threads 483 * of an endpoint have finished, messages are queued in the UDP endpoint 484 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 485 * 486 * These modes have the following analogs: 487 * 488 * UDP_MT_HOT/udp_reader_count==0 none 489 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 490 * UDP_MT_QUEUED RW_WRITE_WANTED 491 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 492 * 493 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 494 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 495 * 496 * While in stable modes, UDP keeps track of the number of threads 497 * operating on the endpoint. The udp_reader_count variable represents 498 * the number of threads entering the endpoint as readers while it is 499 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 500 * is only a single reader, i.e. when this counter drops to 1. Likewise, 501 * udp_squeue_count represents the number of threads operating on the 502 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 503 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 504 * when this counter drops to 0. 505 * 506 * The default mode is set to UDP_MT_HOT and UDP alternates between 507 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 508 * 509 * Mode transition: 510 * ---------------------------------------------------------------- 511 * old mode Event New mode 512 * ---------------------------------------------------------------- 513 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 514 * and udp_reader_count == 1 515 * 516 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 517 * and udp_reader_count > 1 518 * 519 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 520 * 521 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 522 * internal UDP queue successfully 523 * moved to squeue AND udp_squeue_count != 0 524 * 525 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 526 * internal UDP queue successfully 527 * moved to squeue AND udp_squeue_count 528 * drops to zero 529 * 530 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 531 * ---------------------------------------------------------------- 532 */ 533 534 static queue_t * 535 UDP_WR(queue_t *q) 536 { 537 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 538 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 539 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 540 541 return (_WR(q)->q_next); 542 } 543 544 static queue_t * 545 UDP_RD(queue_t *q) 546 { 547 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 548 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 549 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 550 551 return (_RD(q)->q_next); 552 } 553 554 #ifdef DEBUG 555 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 556 #else 557 #define UDP_MODE_ASSERTIONS(udp, caller) 558 #endif 559 560 /* Invariants */ 561 #ifdef DEBUG 562 563 uint32_t udp_count[4]; 564 565 /* Context of udp_mode_assertions */ 566 #define UDP_ENTER 1 567 #define UDP_BECOME_WRITER 2 568 #define UDP_EXIT 3 569 570 static void 571 udp_mode_assertions(udp_t *udp, int caller) 572 { 573 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 574 575 switch (udp->udp_mode) { 576 case UDP_MT_HOT: 577 /* 578 * Messages have not yet been enqueued on the internal queue, 579 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 580 * by definition, there can't be any messages enqueued on the 581 * squeue. The UDP could be quiescent, so udp_reader_count 582 * could be zero at entry. 583 */ 584 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 585 udp->udp_squeue_count == 0); 586 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 587 udp_count[0]++; 588 break; 589 590 case UDP_MT_QUEUED: 591 /* 592 * The last MT thread to exit the udp perimeter empties the 593 * internal queue and then switches the UDP to 594 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 595 * mode, it means there must be at least 1 MT thread still in 596 * the perimeter and at least 1 message on the internal queue. 597 */ 598 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 599 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 600 udp_count[1]++; 601 break; 602 603 case UDP_QUEUED_SQUEUE: 604 /* 605 * The switch has happened from MT to SQUEUE. So there can't 606 * any MT threads. Messages could still pile up on the internal 607 * queue until the transition is complete and we move to 608 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 609 * since the squeue could drain any time. 610 */ 611 ASSERT(udp->udp_reader_count == 0); 612 udp_count[2]++; 613 break; 614 615 case UDP_SQUEUE: 616 /* 617 * The transition is complete. Thre can't be any messages on 618 * the internal queue. The udp could be quiescent or the squeue 619 * could drain any time, so we can't assert on nonzero 620 * udp_squeue_count during entry. Nor can we assert that 621 * udp_reader_count is zero, since, a reader thread could have 622 * directly become writer in line by calling udp_become_writer 623 * without going through the queued states. 624 */ 625 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 626 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 627 udp_count[3]++; 628 break; 629 } 630 } 631 #endif 632 633 #define _UDP_ENTER(connp, mp, proc, tag) { \ 634 udp_t *_udp = (connp)->conn_udp; \ 635 \ 636 mutex_enter(&(connp)->conn_lock); \ 637 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 638 mutex_exit(&(connp)->conn_lock); \ 639 freemsg(mp); \ 640 } else { \ 641 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 642 \ 643 switch (_udp->udp_mode) { \ 644 case UDP_MT_HOT: \ 645 /* We can execute as reader right away. */ \ 646 UDP_READERS_INCREF(_udp); \ 647 mutex_exit(&(connp)->conn_lock); \ 648 (*(proc))(connp, mp, (connp)->conn_sqp); \ 649 break; \ 650 \ 651 case UDP_SQUEUE: \ 652 /* \ 653 * We are in squeue mode, send the \ 654 * packet to the squeue \ 655 */ \ 656 _udp->udp_squeue_count++; \ 657 CONN_INC_REF_LOCKED(connp); \ 658 mutex_exit(&(connp)->conn_lock); \ 659 squeue_enter((connp)->conn_sqp, mp, proc, \ 660 connp, tag); \ 661 break; \ 662 \ 663 case UDP_MT_QUEUED: \ 664 case UDP_QUEUED_SQUEUE: \ 665 /* \ 666 * Some messages may have been enqueued \ 667 * ahead of us. Enqueue the new message \ 668 * at the tail of the internal queue to \ 669 * preserve message ordering. \ 670 */ \ 671 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 672 mutex_exit(&(connp)->conn_lock); \ 673 break; \ 674 } \ 675 } \ 676 } 677 678 static void 679 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 680 { 681 _UDP_ENTER(connp, mp, proc, tag); 682 } 683 684 static void 685 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 686 { 687 udp_t *udp; 688 689 udp = connp->conn_udp; 690 691 mutex_enter(&connp->conn_lock); 692 693 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 694 695 switch (udp->udp_mode) { 696 case UDP_MT_HOT: 697 if (udp->udp_reader_count == 1) { 698 /* 699 * We are the only MT thread. Switch to squeue mode 700 * immediately. 701 */ 702 udp->udp_mode = UDP_SQUEUE; 703 udp->udp_squeue_count = 1; 704 CONN_INC_REF_LOCKED(connp); 705 mutex_exit(&connp->conn_lock); 706 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 707 return; 708 } 709 /* FALLTHRU */ 710 711 case UDP_MT_QUEUED: 712 /* Enqueue the packet internally in UDP */ 713 udp->udp_mode = UDP_MT_QUEUED; 714 UDP_ENQUEUE_MP(udp, mp, proc, tag); 715 mutex_exit(&connp->conn_lock); 716 return; 717 718 case UDP_SQUEUE: 719 case UDP_QUEUED_SQUEUE: 720 /* 721 * We are already exclusive. i.e. we are already 722 * writer. Simply call the desired function. 723 */ 724 udp->udp_squeue_count++; 725 mutex_exit(&connp->conn_lock); 726 (*proc)(connp, mp, connp->conn_sqp); 727 return; 728 } 729 } 730 731 /* 732 * Transition from MT mode to SQUEUE mode, when the last MT thread 733 * is exiting the UDP perimeter. Move all messages from the internal 734 * udp queue to the squeue. A better way would be to move all the 735 * messages in one shot, this needs more support from the squeue framework 736 */ 737 static void 738 udp_switch_to_squeue(udp_t *udp) 739 { 740 mblk_t *mp; 741 mblk_t *mp_next; 742 sqproc_t proc; 743 uint8_t tag; 744 conn_t *connp = udp->udp_connp; 745 746 ASSERT(MUTEX_HELD(&connp->conn_lock)); 747 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 748 while (udp->udp_mphead != NULL) { 749 mp = udp->udp_mphead; 750 udp->udp_mphead = NULL; 751 udp->udp_mptail = NULL; 752 udp->udp_mpcount = 0; 753 udp->udp_mode = UDP_QUEUED_SQUEUE; 754 mutex_exit(&connp->conn_lock); 755 /* 756 * It is best not to hold any locks across the calls 757 * to squeue functions. Since we drop the lock we 758 * need to go back and check the udp_mphead once again 759 * after the squeue_fill and hence the while loop at 760 * the top of this function 761 */ 762 for (; mp != NULL; mp = mp_next) { 763 mp_next = mp->b_next; 764 proc = (sqproc_t)mp->b_prev; 765 tag = (uint8_t)((uintptr_t)mp->b_queue); 766 mp->b_next = NULL; 767 mp->b_prev = NULL; 768 mp->b_queue = NULL; 769 CONN_INC_REF(connp); 770 udp->udp_squeue_count++; 771 squeue_fill(connp->conn_sqp, mp, proc, connp, 772 tag); 773 } 774 mutex_enter(&connp->conn_lock); 775 } 776 /* 777 * udp_squeue_count of zero implies that the squeue has drained 778 * even before we arrived here (i.e. after the squeue_fill above) 779 */ 780 udp->udp_mode = (udp->udp_squeue_count != 0) ? 781 UDP_SQUEUE : UDP_MT_HOT; 782 } 783 784 #define _UDP_EXIT(connp) { \ 785 udp_t *_udp = (connp)->conn_udp; \ 786 \ 787 mutex_enter(&(connp)->conn_lock); \ 788 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 789 \ 790 switch (_udp->udp_mode) { \ 791 case UDP_MT_HOT: \ 792 UDP_READERS_DECREF(_udp); \ 793 mutex_exit(&(connp)->conn_lock); \ 794 break; \ 795 \ 796 case UDP_SQUEUE: \ 797 UDP_SQUEUE_DECREF(_udp); \ 798 if (_udp->udp_squeue_count == 0) \ 799 _udp->udp_mode = UDP_MT_HOT; \ 800 mutex_exit(&(connp)->conn_lock); \ 801 break; \ 802 \ 803 case UDP_MT_QUEUED: \ 804 /* \ 805 * If this is the last MT thread, we need to \ 806 * switch to squeue mode \ 807 */ \ 808 UDP_READERS_DECREF(_udp); \ 809 if (_udp->udp_reader_count == 0) \ 810 udp_switch_to_squeue(_udp); \ 811 mutex_exit(&(connp)->conn_lock); \ 812 break; \ 813 \ 814 case UDP_QUEUED_SQUEUE: \ 815 UDP_SQUEUE_DECREF(_udp); \ 816 /* \ 817 * Even if the udp_squeue_count drops to zero, we \ 818 * don't want to change udp_mode to UDP_MT_HOT here. \ 819 * The thread in udp_switch_to_squeue will take care \ 820 * of the transition to UDP_MT_HOT, after emptying \ 821 * any more new messages that have been enqueued in \ 822 * udp_mphead. \ 823 */ \ 824 mutex_exit(&(connp)->conn_lock); \ 825 break; \ 826 } \ 827 } 828 829 static void 830 udp_exit(conn_t *connp) 831 { 832 _UDP_EXIT(connp); 833 } 834 835 /* 836 * Return the next anonymous port in the privileged port range for 837 * bind checking. 838 * 839 * Trusted Extension (TX) notes: TX allows administrator to mark or 840 * reserve ports as Multilevel ports (MLP). MLP has special function 841 * on TX systems. Once a port is made MLP, it's not available as 842 * ordinary port. This creates "holes" in the port name space. It 843 * may be necessary to skip the "holes" find a suitable anon port. 844 */ 845 static in_port_t 846 udp_get_next_priv_port(udp_t *udp) 847 { 848 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 849 in_port_t nextport; 850 boolean_t restart = B_FALSE; 851 udp_stack_t *us = udp->udp_us; 852 853 retry: 854 if (next_priv_port < us->us_min_anonpriv_port || 855 next_priv_port >= IPPORT_RESERVED) { 856 next_priv_port = IPPORT_RESERVED - 1; 857 if (restart) 858 return (0); 859 restart = B_TRUE; 860 } 861 862 if (is_system_labeled() && 863 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 864 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 865 next_priv_port = nextport; 866 goto retry; 867 } 868 869 return (next_priv_port--); 870 } 871 872 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 873 /* ARGSUSED */ 874 static int 875 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 876 { 877 udp_fanout_t *udpf; 878 int i; 879 zoneid_t zoneid; 880 conn_t *connp; 881 udp_t *udp; 882 udp_stack_t *us; 883 884 connp = Q_TO_CONN(q); 885 udp = connp->conn_udp; 886 us = udp->udp_us; 887 888 /* Refer to comments in udp_status_report(). */ 889 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 890 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 891 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 892 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 893 return (0); 894 } 895 } 896 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 897 /* The following may work even if we cannot get a large buf. */ 898 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 899 return (0); 900 } 901 902 (void) mi_mpprintf(mp, 903 "UDP " MI_COL_HDRPAD_STR 904 /* 12345678[89ABCDEF] */ 905 " zone lport src addr dest addr port state"); 906 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 907 908 zoneid = connp->conn_zoneid; 909 910 for (i = 0; i < us->us_bind_fanout_size; i++) { 911 udpf = &us->us_bind_fanout[i]; 912 mutex_enter(&udpf->uf_lock); 913 914 /* Print the hash index. */ 915 udp = udpf->uf_udp; 916 if (zoneid != GLOBAL_ZONEID) { 917 /* skip to first entry in this zone; might be none */ 918 while (udp != NULL && 919 udp->udp_connp->conn_zoneid != zoneid) 920 udp = udp->udp_bind_hash; 921 } 922 if (udp != NULL) { 923 uint_t print_len, buf_len; 924 925 buf_len = mp->b_cont->b_datap->db_lim - 926 mp->b_cont->b_wptr; 927 print_len = snprintf((char *)mp->b_cont->b_wptr, 928 buf_len, "%d\n", i); 929 if (print_len < buf_len) { 930 mp->b_cont->b_wptr += print_len; 931 } else { 932 mp->b_cont->b_wptr += buf_len; 933 } 934 for (; udp != NULL; udp = udp->udp_bind_hash) { 935 if (zoneid == GLOBAL_ZONEID || 936 zoneid == udp->udp_connp->conn_zoneid) 937 udp_report_item(mp->b_cont, udp); 938 } 939 } 940 mutex_exit(&udpf->uf_lock); 941 } 942 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 943 return (0); 944 } 945 946 /* 947 * Hash list removal routine for udp_t structures. 948 */ 949 static void 950 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 951 { 952 udp_t *udpnext; 953 kmutex_t *lockp; 954 udp_stack_t *us = udp->udp_us; 955 956 if (udp->udp_ptpbhn == NULL) 957 return; 958 959 /* 960 * Extract the lock pointer in case there are concurrent 961 * hash_remove's for this instance. 962 */ 963 ASSERT(udp->udp_port != 0); 964 if (!caller_holds_lock) { 965 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 966 us->us_bind_fanout_size)].uf_lock; 967 ASSERT(lockp != NULL); 968 mutex_enter(lockp); 969 } 970 if (udp->udp_ptpbhn != NULL) { 971 udpnext = udp->udp_bind_hash; 972 if (udpnext != NULL) { 973 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 974 udp->udp_bind_hash = NULL; 975 } 976 *udp->udp_ptpbhn = udpnext; 977 udp->udp_ptpbhn = NULL; 978 } 979 if (!caller_holds_lock) { 980 mutex_exit(lockp); 981 } 982 } 983 984 static void 985 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 986 { 987 udp_t **udpp; 988 udp_t *udpnext; 989 990 ASSERT(MUTEX_HELD(&uf->uf_lock)); 991 if (udp->udp_ptpbhn != NULL) { 992 udp_bind_hash_remove(udp, B_TRUE); 993 } 994 udpp = &uf->uf_udp; 995 udpnext = udpp[0]; 996 if (udpnext != NULL) { 997 /* 998 * If the new udp bound to the INADDR_ANY address 999 * and the first one in the list is not bound to 1000 * INADDR_ANY we skip all entries until we find the 1001 * first one bound to INADDR_ANY. 1002 * This makes sure that applications binding to a 1003 * specific address get preference over those binding to 1004 * INADDR_ANY. 1005 */ 1006 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1007 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1008 while ((udpnext = udpp[0]) != NULL && 1009 !V6_OR_V4_INADDR_ANY( 1010 udpnext->udp_bound_v6src)) { 1011 udpp = &(udpnext->udp_bind_hash); 1012 } 1013 if (udpnext != NULL) 1014 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1015 } else { 1016 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1017 } 1018 } 1019 udp->udp_bind_hash = udpnext; 1020 udp->udp_ptpbhn = udpp; 1021 udpp[0] = udp; 1022 } 1023 1024 /* 1025 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1026 * passed to udp_wput. 1027 * It associates a port number and local address with the stream. 1028 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1029 * protocol type (IPPROTO_UDP) placed in the message following the address. 1030 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1031 * (Called as writer.) 1032 * 1033 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1034 * without setting SO_REUSEADDR. This is needed so that they 1035 * can be viewed as two independent transport protocols. 1036 * However, anonymouns ports are allocated from the same range to avoid 1037 * duplicating the us->us_next_port_to_try. 1038 */ 1039 static void 1040 udp_bind(queue_t *q, mblk_t *mp) 1041 { 1042 sin_t *sin; 1043 sin6_t *sin6; 1044 mblk_t *mp1; 1045 in_port_t port; /* Host byte order */ 1046 in_port_t requested_port; /* Host byte order */ 1047 struct T_bind_req *tbr; 1048 int count; 1049 in6_addr_t v6src; 1050 boolean_t bind_to_req_port_only; 1051 int loopmax; 1052 udp_fanout_t *udpf; 1053 in_port_t lport; /* Network byte order */ 1054 zoneid_t zoneid; 1055 conn_t *connp; 1056 udp_t *udp; 1057 boolean_t is_inaddr_any; 1058 mlp_type_t addrtype, mlptype; 1059 udp_stack_t *us; 1060 1061 connp = Q_TO_CONN(q); 1062 udp = connp->conn_udp; 1063 us = udp->udp_us; 1064 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1065 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1066 "udp_bind: bad req, len %u", 1067 (uint_t)(mp->b_wptr - mp->b_rptr)); 1068 udp_err_ack(q, mp, TPROTO, 0); 1069 return; 1070 } 1071 1072 if (udp->udp_state != TS_UNBND) { 1073 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1074 "udp_bind: bad state, %u", udp->udp_state); 1075 udp_err_ack(q, mp, TOUTSTATE, 0); 1076 return; 1077 } 1078 /* 1079 * Reallocate the message to make sure we have enough room for an 1080 * address and the protocol type. 1081 */ 1082 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1083 if (!mp1) { 1084 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1085 return; 1086 } 1087 1088 mp = mp1; 1089 tbr = (struct T_bind_req *)mp->b_rptr; 1090 switch (tbr->ADDR_length) { 1091 case 0: /* Request for a generic port */ 1092 tbr->ADDR_offset = sizeof (struct T_bind_req); 1093 if (udp->udp_family == AF_INET) { 1094 tbr->ADDR_length = sizeof (sin_t); 1095 sin = (sin_t *)&tbr[1]; 1096 *sin = sin_null; 1097 sin->sin_family = AF_INET; 1098 mp->b_wptr = (uchar_t *)&sin[1]; 1099 } else { 1100 ASSERT(udp->udp_family == AF_INET6); 1101 tbr->ADDR_length = sizeof (sin6_t); 1102 sin6 = (sin6_t *)&tbr[1]; 1103 *sin6 = sin6_null; 1104 sin6->sin6_family = AF_INET6; 1105 mp->b_wptr = (uchar_t *)&sin6[1]; 1106 } 1107 port = 0; 1108 break; 1109 1110 case sizeof (sin_t): /* Complete IPv4 address */ 1111 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1112 sizeof (sin_t)); 1113 if (sin == NULL || !OK_32PTR((char *)sin)) { 1114 udp_err_ack(q, mp, TSYSERR, EINVAL); 1115 return; 1116 } 1117 if (udp->udp_family != AF_INET || 1118 sin->sin_family != AF_INET) { 1119 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1120 return; 1121 } 1122 port = ntohs(sin->sin_port); 1123 break; 1124 1125 case sizeof (sin6_t): /* complete IPv6 address */ 1126 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1127 sizeof (sin6_t)); 1128 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1129 udp_err_ack(q, mp, TSYSERR, EINVAL); 1130 return; 1131 } 1132 if (udp->udp_family != AF_INET6 || 1133 sin6->sin6_family != AF_INET6) { 1134 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1135 return; 1136 } 1137 port = ntohs(sin6->sin6_port); 1138 break; 1139 1140 default: /* Invalid request */ 1141 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1142 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1143 udp_err_ack(q, mp, TBADADDR, 0); 1144 return; 1145 } 1146 1147 requested_port = port; 1148 1149 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1150 bind_to_req_port_only = B_FALSE; 1151 else /* T_BIND_REQ and requested_port != 0 */ 1152 bind_to_req_port_only = B_TRUE; 1153 1154 if (requested_port == 0) { 1155 /* 1156 * If the application passed in zero for the port number, it 1157 * doesn't care which port number we bind to. Get one in the 1158 * valid range. 1159 */ 1160 if (udp->udp_anon_priv_bind) { 1161 port = udp_get_next_priv_port(udp); 1162 } else { 1163 port = udp_update_next_port(udp, 1164 us->us_next_port_to_try, B_TRUE); 1165 } 1166 } else { 1167 /* 1168 * If the port is in the well-known privileged range, 1169 * make sure the caller was privileged. 1170 */ 1171 int i; 1172 boolean_t priv = B_FALSE; 1173 1174 if (port < us->us_smallest_nonpriv_port) { 1175 priv = B_TRUE; 1176 } else { 1177 for (i = 0; i < us->us_num_epriv_ports; i++) { 1178 if (port == us->us_epriv_ports[i]) { 1179 priv = B_TRUE; 1180 break; 1181 } 1182 } 1183 } 1184 1185 if (priv) { 1186 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1187 1188 if (secpolicy_net_privaddr(cr, port) != 0) { 1189 udp_err_ack(q, mp, TACCES, 0); 1190 return; 1191 } 1192 } 1193 } 1194 1195 if (port == 0) { 1196 udp_err_ack(q, mp, TNOADDR, 0); 1197 return; 1198 } 1199 1200 /* 1201 * Copy the source address into our udp structure. This address 1202 * may still be zero; if so, IP will fill in the correct address 1203 * each time an outbound packet is passed to it. 1204 */ 1205 if (udp->udp_family == AF_INET) { 1206 ASSERT(sin != NULL); 1207 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1208 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1209 udp->udp_ip_snd_options_len; 1210 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1211 } else { 1212 ASSERT(sin6 != NULL); 1213 v6src = sin6->sin6_addr; 1214 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1215 udp->udp_ipversion = IPV4_VERSION; 1216 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1217 UDPH_SIZE + udp->udp_ip_snd_options_len; 1218 } else { 1219 udp->udp_ipversion = IPV6_VERSION; 1220 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1221 } 1222 } 1223 1224 /* 1225 * If udp_reuseaddr is not set, then we have to make sure that 1226 * the IP address and port number the application requested 1227 * (or we selected for the application) is not being used by 1228 * another stream. If another stream is already using the 1229 * requested IP address and port, the behavior depends on 1230 * "bind_to_req_port_only". If set the bind fails; otherwise we 1231 * search for any an unused port to bind to the the stream. 1232 * 1233 * As per the BSD semantics, as modified by the Deering multicast 1234 * changes, if udp_reuseaddr is set, then we allow multiple binds 1235 * to the same port independent of the local IP address. 1236 * 1237 * This is slightly different than in SunOS 4.X which did not 1238 * support IP multicast. Note that the change implemented by the 1239 * Deering multicast code effects all binds - not only binding 1240 * to IP multicast addresses. 1241 * 1242 * Note that when binding to port zero we ignore SO_REUSEADDR in 1243 * order to guarantee a unique port. 1244 */ 1245 1246 count = 0; 1247 if (udp->udp_anon_priv_bind) { 1248 /* 1249 * loopmax = (IPPORT_RESERVED-1) - 1250 * us->us_min_anonpriv_port + 1 1251 */ 1252 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 1253 } else { 1254 loopmax = us->us_largest_anon_port - 1255 us->us_smallest_anon_port + 1; 1256 } 1257 1258 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1259 zoneid = connp->conn_zoneid; 1260 1261 for (;;) { 1262 udp_t *udp1; 1263 boolean_t found_exclbind = B_FALSE; 1264 1265 /* 1266 * Walk through the list of udp streams bound to 1267 * requested port with the same IP address. 1268 */ 1269 lport = htons(port); 1270 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 1271 us->us_bind_fanout_size)]; 1272 mutex_enter(&udpf->uf_lock); 1273 for (udp1 = udpf->uf_udp; udp1 != NULL; 1274 udp1 = udp1->udp_bind_hash) { 1275 if (lport != udp1->udp_port) 1276 continue; 1277 1278 /* 1279 * On a labeled system, we must treat bindings to ports 1280 * on shared IP addresses by sockets with MAC exemption 1281 * privilege as being in all zones, as there's 1282 * otherwise no way to identify the right receiver. 1283 */ 1284 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 1285 IPCL_ZONE_MATCH(connp, 1286 udp1->udp_connp->conn_zoneid)) && 1287 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 1288 continue; 1289 1290 /* 1291 * If UDP_EXCLBIND is set for either the bound or 1292 * binding endpoint, the semantics of bind 1293 * is changed according to the following chart. 1294 * 1295 * spec = specified address (v4 or v6) 1296 * unspec = unspecified address (v4 or v6) 1297 * A = specified addresses are different for endpoints 1298 * 1299 * bound bind to allowed? 1300 * ------------------------------------- 1301 * unspec unspec no 1302 * unspec spec no 1303 * spec unspec no 1304 * spec spec yes if A 1305 * 1306 * For labeled systems, SO_MAC_EXEMPT behaves the same 1307 * as UDP_EXCLBIND, except that zoneid is ignored. 1308 */ 1309 if (udp1->udp_exclbind || udp->udp_exclbind || 1310 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 1311 if (V6_OR_V4_INADDR_ANY( 1312 udp1->udp_bound_v6src) || 1313 is_inaddr_any || 1314 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1315 &v6src)) { 1316 found_exclbind = B_TRUE; 1317 break; 1318 } 1319 continue; 1320 } 1321 1322 /* 1323 * Check ipversion to allow IPv4 and IPv6 sockets to 1324 * have disjoint port number spaces. 1325 */ 1326 if (udp->udp_ipversion != udp1->udp_ipversion) { 1327 1328 /* 1329 * On the first time through the loop, if the 1330 * the user intentionally specified a 1331 * particular port number, then ignore any 1332 * bindings of the other protocol that may 1333 * conflict. This allows the user to bind IPv6 1334 * alone and get both v4 and v6, or bind both 1335 * both and get each seperately. On subsequent 1336 * times through the loop, we're checking a 1337 * port that we chose (not the user) and thus 1338 * we do not allow casual duplicate bindings. 1339 */ 1340 if (count == 0 && requested_port != 0) 1341 continue; 1342 } 1343 1344 /* 1345 * No difference depending on SO_REUSEADDR. 1346 * 1347 * If existing port is bound to a 1348 * non-wildcard IP address and 1349 * the requesting stream is bound to 1350 * a distinct different IP addresses 1351 * (non-wildcard, also), keep going. 1352 */ 1353 if (!is_inaddr_any && 1354 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1355 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1356 &v6src)) { 1357 continue; 1358 } 1359 break; 1360 } 1361 1362 if (!found_exclbind && 1363 (udp->udp_reuseaddr && requested_port != 0)) { 1364 break; 1365 } 1366 1367 if (udp1 == NULL) { 1368 /* 1369 * No other stream has this IP address 1370 * and port number. We can use it. 1371 */ 1372 break; 1373 } 1374 mutex_exit(&udpf->uf_lock); 1375 if (bind_to_req_port_only) { 1376 /* 1377 * We get here only when requested port 1378 * is bound (and only first of the for() 1379 * loop iteration). 1380 * 1381 * The semantics of this bind request 1382 * require it to fail so we return from 1383 * the routine (and exit the loop). 1384 * 1385 */ 1386 udp_err_ack(q, mp, TADDRBUSY, 0); 1387 return; 1388 } 1389 1390 if (udp->udp_anon_priv_bind) { 1391 port = udp_get_next_priv_port(udp); 1392 } else { 1393 if ((count == 0) && (requested_port != 0)) { 1394 /* 1395 * If the application wants us to find 1396 * a port, get one to start with. Set 1397 * requested_port to 0, so that we will 1398 * update us->us_next_port_to_try below. 1399 */ 1400 port = udp_update_next_port(udp, 1401 us->us_next_port_to_try, B_TRUE); 1402 requested_port = 0; 1403 } else { 1404 port = udp_update_next_port(udp, port + 1, 1405 B_FALSE); 1406 } 1407 } 1408 1409 if (port == 0 || ++count >= loopmax) { 1410 /* 1411 * We've tried every possible port number and 1412 * there are none available, so send an error 1413 * to the user. 1414 */ 1415 udp_err_ack(q, mp, TNOADDR, 0); 1416 return; 1417 } 1418 } 1419 1420 /* 1421 * Copy the source address into our udp structure. This address 1422 * may still be zero; if so, ip will fill in the correct address 1423 * each time an outbound packet is passed to it. 1424 * If we are binding to a broadcast or multicast address udp_rput 1425 * will clear the source address when it receives the T_BIND_ACK. 1426 */ 1427 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1428 udp->udp_port = lport; 1429 /* 1430 * Now reset the the next anonymous port if the application requested 1431 * an anonymous port, or we handed out the next anonymous port. 1432 */ 1433 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1434 us->us_next_port_to_try = port + 1; 1435 } 1436 1437 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1438 if (udp->udp_family == AF_INET) { 1439 sin->sin_port = udp->udp_port; 1440 } else { 1441 int error; 1442 1443 sin6->sin6_port = udp->udp_port; 1444 /* Rebuild the header template */ 1445 error = udp_build_hdrs(q, udp); 1446 if (error != 0) { 1447 mutex_exit(&udpf->uf_lock); 1448 udp_err_ack(q, mp, TSYSERR, error); 1449 return; 1450 } 1451 } 1452 udp->udp_state = TS_IDLE; 1453 udp_bind_hash_insert(udpf, udp); 1454 mutex_exit(&udpf->uf_lock); 1455 1456 if (cl_inet_bind) { 1457 /* 1458 * Running in cluster mode - register bind information 1459 */ 1460 if (udp->udp_ipversion == IPV4_VERSION) { 1461 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1462 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1463 (in_port_t)udp->udp_port); 1464 } else { 1465 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1466 (uint8_t *)&(udp->udp_v6src), 1467 (in_port_t)udp->udp_port); 1468 } 1469 1470 } 1471 1472 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1473 if (is_system_labeled() && (!connp->conn_anon_port || 1474 connp->conn_anon_mlp)) { 1475 uint16_t mlpport; 1476 cred_t *cr = connp->conn_cred; 1477 zone_t *zone; 1478 1479 zone = crgetzone(cr); 1480 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1481 mlptSingle; 1482 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 1483 &v6src, udp->udp_us->us_netstack->netstack_ip); 1484 if (addrtype == mlptSingle) { 1485 udp_err_ack(q, mp, TNOADDR, 0); 1486 connp->conn_anon_port = B_FALSE; 1487 connp->conn_mlp_type = mlptSingle; 1488 return; 1489 } 1490 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1491 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1492 addrtype); 1493 if (mlptype != mlptSingle && 1494 (connp->conn_mlp_type == mlptSingle || 1495 secpolicy_net_bindmlp(cr) != 0)) { 1496 if (udp->udp_debug) { 1497 (void) strlog(UDP_MOD_ID, 0, 1, 1498 SL_ERROR|SL_TRACE, 1499 "udp_bind: no priv for multilevel port %d", 1500 mlpport); 1501 } 1502 udp_err_ack(q, mp, TACCES, 0); 1503 connp->conn_anon_port = B_FALSE; 1504 connp->conn_mlp_type = mlptSingle; 1505 return; 1506 } 1507 1508 /* 1509 * If we're specifically binding a shared IP address and the 1510 * port is MLP on shared addresses, then check to see if this 1511 * zone actually owns the MLP. Reject if not. 1512 */ 1513 if (mlptype == mlptShared && addrtype == mlptShared) { 1514 /* 1515 * No need to handle exclusive-stack zones since 1516 * ALL_ZONES only applies to the shared stack. 1517 */ 1518 zoneid_t mlpzone; 1519 1520 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1521 htons(mlpport)); 1522 if (connp->conn_zoneid != mlpzone) { 1523 if (udp->udp_debug) { 1524 (void) strlog(UDP_MOD_ID, 0, 1, 1525 SL_ERROR|SL_TRACE, 1526 "udp_bind: attempt to bind port " 1527 "%d on shared addr in zone %d " 1528 "(should be %d)", 1529 mlpport, connp->conn_zoneid, 1530 mlpzone); 1531 } 1532 udp_err_ack(q, mp, TACCES, 0); 1533 connp->conn_anon_port = B_FALSE; 1534 connp->conn_mlp_type = mlptSingle; 1535 return; 1536 } 1537 } 1538 if (connp->conn_anon_port) { 1539 int error; 1540 1541 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1542 port, B_TRUE); 1543 if (error != 0) { 1544 if (udp->udp_debug) { 1545 (void) strlog(UDP_MOD_ID, 0, 1, 1546 SL_ERROR|SL_TRACE, 1547 "udp_bind: cannot establish anon " 1548 "MLP for port %d", port); 1549 } 1550 udp_err_ack(q, mp, TACCES, 0); 1551 connp->conn_anon_port = B_FALSE; 1552 connp->conn_mlp_type = mlptSingle; 1553 return; 1554 } 1555 } 1556 connp->conn_mlp_type = mlptype; 1557 } 1558 1559 /* Pass the protocol number in the message following the address. */ 1560 *mp->b_wptr++ = IPPROTO_UDP; 1561 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1562 /* 1563 * Append a request for an IRE if udp_v6src not 1564 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1565 */ 1566 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1567 if (!mp->b_cont) { 1568 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1569 return; 1570 } 1571 mp->b_cont->b_wptr += sizeof (ire_t); 1572 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1573 } 1574 if (udp->udp_family == AF_INET6) 1575 mp = ip_bind_v6(q, mp, connp, NULL); 1576 else 1577 mp = ip_bind_v4(q, mp, connp); 1578 1579 if (mp != NULL) 1580 udp_rput_other(_RD(q), mp); 1581 else 1582 CONN_INC_REF(connp); 1583 } 1584 1585 1586 void 1587 udp_resume_bind(conn_t *connp, mblk_t *mp) 1588 { 1589 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1590 } 1591 1592 /* 1593 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1594 */ 1595 /* ARGSUSED */ 1596 static void 1597 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1598 { 1599 conn_t *connp = arg; 1600 1601 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1602 1603 udp_rput_other(connp->conn_rq, mp); 1604 1605 CONN_OPER_PENDING_DONE(connp); 1606 udp_exit(connp); 1607 } 1608 1609 /* 1610 * This routine handles each T_CONN_REQ message passed to udp. It 1611 * associates a default destination address with the stream. 1612 * 1613 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1614 * T_BIND_REQ - specifying local and remote address/port 1615 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1616 * T_OK_ACK - for the T_CONN_REQ 1617 * T_CONN_CON - to keep the TPI user happy 1618 * 1619 * The connect completes in udp_rput. 1620 * When a T_BIND_ACK is received information is extracted from the IRE 1621 * and the two appended messages are sent to the TPI user. 1622 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1623 * it to an error ack for the appropriate primitive. 1624 */ 1625 static void 1626 udp_connect(queue_t *q, mblk_t *mp) 1627 { 1628 sin6_t *sin6; 1629 sin_t *sin; 1630 struct T_conn_req *tcr; 1631 in6_addr_t v6dst; 1632 ipaddr_t v4dst; 1633 uint16_t dstport; 1634 uint32_t flowinfo; 1635 mblk_t *mp1, *mp2; 1636 udp_fanout_t *udpf; 1637 udp_t *udp, *udp1; 1638 udp_stack_t *us; 1639 1640 udp = Q_TO_UDP(q); 1641 1642 tcr = (struct T_conn_req *)mp->b_rptr; 1643 us = udp->udp_us; 1644 1645 /* A bit of sanity checking */ 1646 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1647 udp_err_ack(q, mp, TPROTO, 0); 1648 return; 1649 } 1650 /* 1651 * This UDP must have bound to a port already before doing 1652 * a connect. 1653 */ 1654 if (udp->udp_state == TS_UNBND) { 1655 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1656 "udp_connect: bad state, %u", udp->udp_state); 1657 udp_err_ack(q, mp, TOUTSTATE, 0); 1658 return; 1659 } 1660 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1661 1662 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1663 us->us_bind_fanout_size)]; 1664 1665 if (udp->udp_state == TS_DATA_XFER) { 1666 /* Already connected - clear out state */ 1667 mutex_enter(&udpf->uf_lock); 1668 udp->udp_v6src = udp->udp_bound_v6src; 1669 udp->udp_state = TS_IDLE; 1670 mutex_exit(&udpf->uf_lock); 1671 } 1672 1673 if (tcr->OPT_length != 0) { 1674 udp_err_ack(q, mp, TBADOPT, 0); 1675 return; 1676 } 1677 1678 /* 1679 * Determine packet type based on type of address passed in 1680 * the request should contain an IPv4 or IPv6 address. 1681 * Make sure that address family matches the type of 1682 * family of the the address passed down 1683 */ 1684 switch (tcr->DEST_length) { 1685 default: 1686 udp_err_ack(q, mp, TBADADDR, 0); 1687 return; 1688 1689 case sizeof (sin_t): 1690 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1691 sizeof (sin_t)); 1692 if (sin == NULL || !OK_32PTR((char *)sin)) { 1693 udp_err_ack(q, mp, TSYSERR, EINVAL); 1694 return; 1695 } 1696 if (udp->udp_family != AF_INET || 1697 sin->sin_family != AF_INET) { 1698 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1699 return; 1700 } 1701 v4dst = sin->sin_addr.s_addr; 1702 dstport = sin->sin_port; 1703 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1704 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1705 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1706 udp->udp_ip_snd_options_len; 1707 break; 1708 1709 case sizeof (sin6_t): 1710 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1711 sizeof (sin6_t)); 1712 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1713 udp_err_ack(q, mp, TSYSERR, EINVAL); 1714 return; 1715 } 1716 if (udp->udp_family != AF_INET6 || 1717 sin6->sin6_family != AF_INET6) { 1718 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1719 return; 1720 } 1721 v6dst = sin6->sin6_addr; 1722 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1723 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1724 udp->udp_ipversion = IPV4_VERSION; 1725 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1726 UDPH_SIZE + udp->udp_ip_snd_options_len; 1727 flowinfo = 0; 1728 } else { 1729 udp->udp_ipversion = IPV6_VERSION; 1730 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1731 flowinfo = sin6->sin6_flowinfo; 1732 } 1733 dstport = sin6->sin6_port; 1734 break; 1735 } 1736 if (dstport == 0) { 1737 udp_err_ack(q, mp, TBADADDR, 0); 1738 return; 1739 } 1740 1741 /* 1742 * Create a default IP header with no IP options. 1743 */ 1744 udp->udp_dstport = dstport; 1745 if (udp->udp_ipversion == IPV4_VERSION) { 1746 /* 1747 * Interpret a zero destination to mean loopback. 1748 * Update the T_CONN_REQ (sin/sin6) since it is used to 1749 * generate the T_CONN_CON. 1750 */ 1751 if (v4dst == INADDR_ANY) { 1752 v4dst = htonl(INADDR_LOOPBACK); 1753 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1754 if (udp->udp_family == AF_INET) { 1755 sin->sin_addr.s_addr = v4dst; 1756 } else { 1757 sin6->sin6_addr = v6dst; 1758 } 1759 } 1760 udp->udp_v6dst = v6dst; 1761 udp->udp_flowinfo = 0; 1762 1763 /* 1764 * If the destination address is multicast and 1765 * an outgoing multicast interface has been set, 1766 * use the address of that interface as our 1767 * source address if no source address has been set. 1768 */ 1769 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1770 CLASSD(v4dst) && 1771 udp->udp_multicast_if_addr != INADDR_ANY) { 1772 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1773 &udp->udp_v6src); 1774 } 1775 } else { 1776 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1777 /* 1778 * Interpret a zero destination to mean loopback. 1779 * Update the T_CONN_REQ (sin/sin6) since it is used to 1780 * generate the T_CONN_CON. 1781 */ 1782 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1783 v6dst = ipv6_loopback; 1784 sin6->sin6_addr = v6dst; 1785 } 1786 udp->udp_v6dst = v6dst; 1787 udp->udp_flowinfo = flowinfo; 1788 /* 1789 * If the destination address is multicast and 1790 * an outgoing multicast interface has been set, 1791 * then the ip bind logic will pick the correct source 1792 * address (i.e. matching the outgoing multicast interface). 1793 */ 1794 } 1795 1796 /* 1797 * Verify that the src/port/dst/port and zoneid is unique for all 1798 * connections in TS_DATA_XFER 1799 */ 1800 mutex_enter(&udpf->uf_lock); 1801 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1802 if (udp1->udp_state != TS_DATA_XFER) 1803 continue; 1804 if (udp->udp_port != udp1->udp_port || 1805 udp->udp_ipversion != udp1->udp_ipversion || 1806 dstport != udp1->udp_dstport || 1807 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1808 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 1809 !(IPCL_ZONE_MATCH(udp->udp_connp, 1810 udp1->udp_connp->conn_zoneid) || 1811 IPCL_ZONE_MATCH(udp1->udp_connp, 1812 udp->udp_connp->conn_zoneid))) 1813 continue; 1814 mutex_exit(&udpf->uf_lock); 1815 udp_err_ack(q, mp, TBADADDR, 0); 1816 return; 1817 } 1818 udp->udp_state = TS_DATA_XFER; 1819 mutex_exit(&udpf->uf_lock); 1820 1821 /* 1822 * Send down bind to IP to verify that there is a route 1823 * and to determine the source address. 1824 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1825 */ 1826 if (udp->udp_family == AF_INET) 1827 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1828 else 1829 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1830 if (mp1 == NULL) { 1831 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1832 bind_failed: 1833 mutex_enter(&udpf->uf_lock); 1834 udp->udp_state = TS_IDLE; 1835 mutex_exit(&udpf->uf_lock); 1836 return; 1837 } 1838 1839 /* 1840 * We also have to send a connection confirmation to 1841 * keep TLI happy. Prepare it for udp_rput. 1842 */ 1843 if (udp->udp_family == AF_INET) 1844 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1845 sizeof (*sin), NULL, 0); 1846 else 1847 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1848 sizeof (*sin6), NULL, 0); 1849 if (mp2 == NULL) { 1850 freemsg(mp1); 1851 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1852 goto bind_failed; 1853 } 1854 1855 mp = mi_tpi_ok_ack_alloc(mp); 1856 if (mp == NULL) { 1857 /* Unable to reuse the T_CONN_REQ for the ack. */ 1858 freemsg(mp2); 1859 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1860 goto bind_failed; 1861 } 1862 1863 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1864 linkb(mp1, mp); 1865 linkb(mp1, mp2); 1866 1867 mblk_setcred(mp1, udp->udp_connp->conn_cred); 1868 if (udp->udp_family == AF_INET) 1869 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1870 else 1871 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1872 1873 if (mp1 != NULL) 1874 udp_rput_other(_RD(q), mp1); 1875 else 1876 CONN_INC_REF(udp->udp_connp); 1877 } 1878 1879 static int 1880 udp_close(queue_t *q) 1881 { 1882 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1883 udp_t *udp; 1884 queue_t *ip_rq = RD(UDP_WR(q)); 1885 1886 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1887 udp = connp->conn_udp; 1888 1889 ip_quiesce_conn(connp); 1890 /* 1891 * Disable read-side synchronous stream 1892 * interface and drain any queued data. 1893 */ 1894 udp_rcv_drain(q, udp, B_TRUE); 1895 ASSERT(!udp->udp_direct_sockfs); 1896 1897 qprocsoff(q); 1898 1899 /* restore IP module's high and low water marks to default values */ 1900 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1901 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1902 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1903 1904 ASSERT(udp->udp_rcv_cnt == 0); 1905 ASSERT(udp->udp_rcv_msgcnt == 0); 1906 ASSERT(udp->udp_rcv_list_head == NULL); 1907 ASSERT(udp->udp_rcv_list_tail == NULL); 1908 1909 udp_close_free(connp); 1910 1911 /* 1912 * Restore connp as an IP endpoint. 1913 * Locking required to prevent a race with udp_snmp_get()/ 1914 * ipcl_get_next_conn(), which selects conn_t which are 1915 * IPCL_UDP and not CONN_CONDEMNED. 1916 */ 1917 mutex_enter(&connp->conn_lock); 1918 connp->conn_flags &= ~IPCL_UDP; 1919 connp->conn_state_flags &= 1920 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1921 connp->conn_ulp_labeled = B_FALSE; 1922 mutex_exit(&connp->conn_lock); 1923 1924 return (0); 1925 } 1926 1927 /* 1928 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1929 */ 1930 void 1931 udp_quiesce_conn(conn_t *connp) 1932 { 1933 udp_t *udp = connp->conn_udp; 1934 1935 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1936 /* 1937 * Running in cluster mode - register unbind information 1938 */ 1939 if (udp->udp_ipversion == IPV4_VERSION) { 1940 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1941 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1942 (in_port_t)udp->udp_port); 1943 } else { 1944 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1945 (uint8_t *)(&(udp->udp_v6src)), 1946 (in_port_t)udp->udp_port); 1947 } 1948 } 1949 1950 udp_bind_hash_remove(udp, B_FALSE); 1951 1952 mutex_enter(&connp->conn_lock); 1953 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1954 udp->udp_mode != UDP_MT_HOT) { 1955 cv_wait(&connp->conn_cv, &connp->conn_lock); 1956 } 1957 mutex_exit(&connp->conn_lock); 1958 } 1959 1960 void 1961 udp_close_free(conn_t *connp) 1962 { 1963 udp_t *udp = connp->conn_udp; 1964 1965 /* If there are any options associated with the stream, free them. */ 1966 if (udp->udp_ip_snd_options) { 1967 mi_free((char *)udp->udp_ip_snd_options); 1968 udp->udp_ip_snd_options = NULL; 1969 } 1970 1971 if (udp->udp_ip_rcv_options) { 1972 mi_free((char *)udp->udp_ip_rcv_options); 1973 udp->udp_ip_rcv_options = NULL; 1974 } 1975 1976 /* Free memory associated with sticky options */ 1977 if (udp->udp_sticky_hdrs_len != 0) { 1978 kmem_free(udp->udp_sticky_hdrs, 1979 udp->udp_sticky_hdrs_len); 1980 udp->udp_sticky_hdrs = NULL; 1981 udp->udp_sticky_hdrs_len = 0; 1982 } 1983 1984 ip6_pkt_free(&udp->udp_sticky_ipp); 1985 1986 udp->udp_connp = NULL; 1987 netstack_rele(udp->udp_us->us_netstack); 1988 1989 connp->conn_udp = NULL; 1990 kmem_cache_free(udp_cache, udp); 1991 } 1992 1993 /* 1994 * This routine handles each T_DISCON_REQ message passed to udp 1995 * as an indicating that UDP is no longer connected. This results 1996 * in sending a T_BIND_REQ to IP to restore the binding to just 1997 * the local address/port. 1998 * 1999 * This routine sends down a T_BIND_REQ to IP with the following mblks: 2000 * T_BIND_REQ - specifying just the local address/port 2001 * T_OK_ACK - for the T_DISCON_REQ 2002 * 2003 * The disconnect completes in udp_rput. 2004 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 2005 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 2006 * it to an error ack for the appropriate primitive. 2007 */ 2008 static void 2009 udp_disconnect(queue_t *q, mblk_t *mp) 2010 { 2011 udp_t *udp = Q_TO_UDP(q); 2012 mblk_t *mp1; 2013 udp_fanout_t *udpf; 2014 udp_stack_t *us; 2015 2016 us = udp->udp_us; 2017 if (udp->udp_state != TS_DATA_XFER) { 2018 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2019 "udp_disconnect: bad state, %u", udp->udp_state); 2020 udp_err_ack(q, mp, TOUTSTATE, 0); 2021 return; 2022 } 2023 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 2024 us->us_bind_fanout_size)]; 2025 mutex_enter(&udpf->uf_lock); 2026 udp->udp_v6src = udp->udp_bound_v6src; 2027 udp->udp_state = TS_IDLE; 2028 mutex_exit(&udpf->uf_lock); 2029 2030 /* 2031 * Send down bind to IP to remove the full binding and revert 2032 * to the local address binding. 2033 */ 2034 if (udp->udp_family == AF_INET) 2035 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 2036 else 2037 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 2038 if (mp1 == NULL) { 2039 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2040 return; 2041 } 2042 mp = mi_tpi_ok_ack_alloc(mp); 2043 if (mp == NULL) { 2044 /* Unable to reuse the T_DISCON_REQ for the ack. */ 2045 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 2046 return; 2047 } 2048 2049 if (udp->udp_family == AF_INET6) { 2050 int error; 2051 2052 /* Rebuild the header template */ 2053 error = udp_build_hdrs(q, udp); 2054 if (error != 0) { 2055 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 2056 freemsg(mp1); 2057 return; 2058 } 2059 } 2060 mutex_enter(&udpf->uf_lock); 2061 udp->udp_discon_pending = 1; 2062 mutex_exit(&udpf->uf_lock); 2063 2064 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 2065 linkb(mp1, mp); 2066 2067 if (udp->udp_family == AF_INET6) 2068 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 2069 else 2070 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 2071 2072 if (mp1 != NULL) 2073 udp_rput_other(_RD(q), mp1); 2074 else 2075 CONN_INC_REF(udp->udp_connp); 2076 } 2077 2078 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 2079 static void 2080 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 2081 { 2082 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2083 putnext(UDP_RD(q), mp); 2084 } 2085 2086 /* Shorthand to generate and send TPI error acks to our client */ 2087 static void 2088 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2089 int sys_error) 2090 { 2091 struct T_error_ack *teackp; 2092 2093 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2094 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2095 teackp = (struct T_error_ack *)mp->b_rptr; 2096 teackp->ERROR_prim = primitive; 2097 teackp->TLI_error = t_error; 2098 teackp->UNIX_error = sys_error; 2099 putnext(UDP_RD(q), mp); 2100 } 2101 } 2102 2103 /*ARGSUSED*/ 2104 static int 2105 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2106 { 2107 int i; 2108 udp_t *udp = Q_TO_UDP(q); 2109 udp_stack_t *us = udp->udp_us; 2110 2111 for (i = 0; i < us->us_num_epriv_ports; i++) { 2112 if (us->us_epriv_ports[i] != 0) 2113 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 2114 } 2115 return (0); 2116 } 2117 2118 /* ARGSUSED */ 2119 static int 2120 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2121 cred_t *cr) 2122 { 2123 long new_value; 2124 int i; 2125 udp_t *udp = Q_TO_UDP(q); 2126 udp_stack_t *us = udp->udp_us; 2127 2128 /* 2129 * Fail the request if the new value does not lie within the 2130 * port number limits. 2131 */ 2132 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2133 new_value <= 0 || new_value >= 65536) { 2134 return (EINVAL); 2135 } 2136 2137 /* Check if the value is already in the list */ 2138 for (i = 0; i < us->us_num_epriv_ports; i++) { 2139 if (new_value == us->us_epriv_ports[i]) { 2140 return (EEXIST); 2141 } 2142 } 2143 /* Find an empty slot */ 2144 for (i = 0; i < us->us_num_epriv_ports; i++) { 2145 if (us->us_epriv_ports[i] == 0) 2146 break; 2147 } 2148 if (i == us->us_num_epriv_ports) { 2149 return (EOVERFLOW); 2150 } 2151 2152 /* Set the new value */ 2153 us->us_epriv_ports[i] = (in_port_t)new_value; 2154 return (0); 2155 } 2156 2157 /* ARGSUSED */ 2158 static int 2159 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2160 cred_t *cr) 2161 { 2162 long new_value; 2163 int i; 2164 udp_t *udp = Q_TO_UDP(q); 2165 udp_stack_t *us = udp->udp_us; 2166 2167 /* 2168 * Fail the request if the new value does not lie within the 2169 * port number limits. 2170 */ 2171 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2172 new_value <= 0 || new_value >= 65536) { 2173 return (EINVAL); 2174 } 2175 2176 /* Check that the value is already in the list */ 2177 for (i = 0; i < us->us_num_epriv_ports; i++) { 2178 if (us->us_epriv_ports[i] == new_value) 2179 break; 2180 } 2181 if (i == us->us_num_epriv_ports) { 2182 return (ESRCH); 2183 } 2184 2185 /* Clear the value */ 2186 us->us_epriv_ports[i] = 0; 2187 return (0); 2188 } 2189 2190 /* At minimum we need 4 bytes of UDP header */ 2191 #define ICMP_MIN_UDP_HDR 4 2192 2193 /* 2194 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2195 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2196 * Assumes that IP has pulled up everything up to and including the ICMP header. 2197 * An M_CTL could potentially come here from some other module (i.e. if UDP 2198 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2199 * does not have enough ICMP information , following STREAMS conventions, 2200 * we send it upstream assuming it is an M_CTL we don't understand. 2201 */ 2202 static void 2203 udp_icmp_error(queue_t *q, mblk_t *mp) 2204 { 2205 icmph_t *icmph; 2206 ipha_t *ipha; 2207 int iph_hdr_length; 2208 udpha_t *udpha; 2209 sin_t sin; 2210 sin6_t sin6; 2211 mblk_t *mp1; 2212 int error = 0; 2213 size_t mp_size = MBLKL(mp); 2214 udp_t *udp = Q_TO_UDP(q); 2215 2216 /* 2217 * Assume IP provides aligned packets - otherwise toss 2218 */ 2219 if (!OK_32PTR(mp->b_rptr)) { 2220 freemsg(mp); 2221 return; 2222 } 2223 2224 /* 2225 * Verify that we have a complete IP header and the application has 2226 * asked for errors. If not, send it upstream. 2227 */ 2228 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2229 noticmpv4: 2230 putnext(UDP_RD(q), mp); 2231 return; 2232 } 2233 2234 ipha = (ipha_t *)mp->b_rptr; 2235 /* 2236 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2237 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2238 */ 2239 switch (IPH_HDR_VERSION(ipha)) { 2240 case IPV6_VERSION: 2241 udp_icmp_error_ipv6(q, mp); 2242 return; 2243 case IPV4_VERSION: 2244 break; 2245 default: 2246 goto noticmpv4; 2247 } 2248 2249 /* Skip past the outer IP and ICMP headers */ 2250 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2251 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2252 /* 2253 * If we don't have the correct outer IP header length or if the ULP 2254 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2255 * send the packet upstream. 2256 */ 2257 if (iph_hdr_length < sizeof (ipha_t) || 2258 ipha->ipha_protocol != IPPROTO_ICMP || 2259 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2260 goto noticmpv4; 2261 } 2262 ipha = (ipha_t *)&icmph[1]; 2263 2264 /* Skip past the inner IP and find the ULP header */ 2265 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2266 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2267 /* 2268 * If we don't have the correct inner IP header length or if the ULP 2269 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2270 * bytes of UDP header, send it upstream. 2271 */ 2272 if (iph_hdr_length < sizeof (ipha_t) || 2273 ipha->ipha_protocol != IPPROTO_UDP || 2274 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2275 goto noticmpv4; 2276 } 2277 2278 switch (icmph->icmph_type) { 2279 case ICMP_DEST_UNREACHABLE: 2280 switch (icmph->icmph_code) { 2281 case ICMP_FRAGMENTATION_NEEDED: 2282 /* 2283 * IP has already adjusted the path MTU. 2284 * XXX Somehow pass MTU indication to application? 2285 */ 2286 break; 2287 case ICMP_PORT_UNREACHABLE: 2288 case ICMP_PROTOCOL_UNREACHABLE: 2289 error = ECONNREFUSED; 2290 break; 2291 default: 2292 /* Transient errors */ 2293 break; 2294 } 2295 break; 2296 default: 2297 /* Transient errors */ 2298 break; 2299 } 2300 if (error == 0) { 2301 freemsg(mp); 2302 return; 2303 } 2304 2305 switch (udp->udp_family) { 2306 case AF_INET: 2307 sin = sin_null; 2308 sin.sin_family = AF_INET; 2309 sin.sin_addr.s_addr = ipha->ipha_dst; 2310 sin.sin_port = udpha->uha_dst_port; 2311 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2312 error); 2313 break; 2314 case AF_INET6: 2315 sin6 = sin6_null; 2316 sin6.sin6_family = AF_INET6; 2317 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2318 sin6.sin6_port = udpha->uha_dst_port; 2319 2320 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2321 NULL, 0, error); 2322 break; 2323 } 2324 if (mp1) 2325 putnext(UDP_RD(q), mp1); 2326 freemsg(mp); 2327 } 2328 2329 /* 2330 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2331 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2332 * Assumes that IP has pulled up all the extension headers as well as the 2333 * ICMPv6 header. 2334 * An M_CTL could potentially come here from some other module (i.e. if UDP 2335 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2336 * does not have enough ICMP information , following STREAMS conventions, 2337 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2338 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2339 * field (when cast to ipha_t in udp_icmp_error). 2340 */ 2341 static void 2342 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2343 { 2344 icmp6_t *icmp6; 2345 ip6_t *ip6h, *outer_ip6h; 2346 uint16_t hdr_length; 2347 uint8_t *nexthdrp; 2348 udpha_t *udpha; 2349 sin6_t sin6; 2350 mblk_t *mp1; 2351 int error = 0; 2352 size_t mp_size = MBLKL(mp); 2353 udp_t *udp = Q_TO_UDP(q); 2354 2355 /* 2356 * Verify that we have a complete IP header. If not, send it upstream. 2357 */ 2358 if (mp_size < sizeof (ip6_t)) { 2359 noticmpv6: 2360 putnext(UDP_RD(q), mp); 2361 return; 2362 } 2363 2364 outer_ip6h = (ip6_t *)mp->b_rptr; 2365 /* 2366 * Verify this is an ICMPV6 packet, else send it upstream 2367 */ 2368 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2369 hdr_length = IPV6_HDR_LEN; 2370 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2371 &nexthdrp) || 2372 *nexthdrp != IPPROTO_ICMPV6) { 2373 goto noticmpv6; 2374 } 2375 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2376 ip6h = (ip6_t *)&icmp6[1]; 2377 /* 2378 * Verify we have a complete ICMP and inner IP header. 2379 */ 2380 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2381 goto noticmpv6; 2382 2383 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2384 goto noticmpv6; 2385 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2386 /* 2387 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2388 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2389 * packet upstream. 2390 */ 2391 if ((*nexthdrp != IPPROTO_UDP) || 2392 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2393 goto noticmpv6; 2394 } 2395 2396 switch (icmp6->icmp6_type) { 2397 case ICMP6_DST_UNREACH: 2398 switch (icmp6->icmp6_code) { 2399 case ICMP6_DST_UNREACH_NOPORT: 2400 error = ECONNREFUSED; 2401 break; 2402 case ICMP6_DST_UNREACH_ADMIN: 2403 case ICMP6_DST_UNREACH_NOROUTE: 2404 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2405 case ICMP6_DST_UNREACH_ADDR: 2406 /* Transient errors */ 2407 break; 2408 default: 2409 break; 2410 } 2411 break; 2412 case ICMP6_PACKET_TOO_BIG: { 2413 struct T_unitdata_ind *tudi; 2414 struct T_opthdr *toh; 2415 size_t udi_size; 2416 mblk_t *newmp; 2417 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2418 sizeof (struct ip6_mtuinfo); 2419 sin6_t *sin6; 2420 struct ip6_mtuinfo *mtuinfo; 2421 2422 /* 2423 * If the application has requested to receive path mtu 2424 * information, send up an empty message containing an 2425 * IPV6_PATHMTU ancillary data item. 2426 */ 2427 if (!udp->udp_ipv6_recvpathmtu) 2428 break; 2429 2430 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2431 opt_length; 2432 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2433 BUMP_MIB(&udp->udp_mib, udpInErrors); 2434 break; 2435 } 2436 2437 /* 2438 * newmp->b_cont is left to NULL on purpose. This is an 2439 * empty message containing only ancillary data. 2440 */ 2441 newmp->b_datap->db_type = M_PROTO; 2442 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2443 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2444 tudi->PRIM_type = T_UNITDATA_IND; 2445 tudi->SRC_length = sizeof (sin6_t); 2446 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2447 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2448 tudi->OPT_length = opt_length; 2449 2450 sin6 = (sin6_t *)&tudi[1]; 2451 bzero(sin6, sizeof (sin6_t)); 2452 sin6->sin6_family = AF_INET6; 2453 sin6->sin6_addr = udp->udp_v6dst; 2454 2455 toh = (struct T_opthdr *)&sin6[1]; 2456 toh->level = IPPROTO_IPV6; 2457 toh->name = IPV6_PATHMTU; 2458 toh->len = opt_length; 2459 toh->status = 0; 2460 2461 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2462 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2463 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2464 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2465 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2466 /* 2467 * We've consumed everything we need from the original 2468 * message. Free it, then send our empty message. 2469 */ 2470 freemsg(mp); 2471 putnext(UDP_RD(q), newmp); 2472 return; 2473 } 2474 case ICMP6_TIME_EXCEEDED: 2475 /* Transient errors */ 2476 break; 2477 case ICMP6_PARAM_PROB: 2478 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2479 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2480 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2481 (uchar_t *)nexthdrp) { 2482 error = ECONNREFUSED; 2483 break; 2484 } 2485 break; 2486 } 2487 if (error == 0) { 2488 freemsg(mp); 2489 return; 2490 } 2491 2492 sin6 = sin6_null; 2493 sin6.sin6_family = AF_INET6; 2494 sin6.sin6_addr = ip6h->ip6_dst; 2495 sin6.sin6_port = udpha->uha_dst_port; 2496 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2497 2498 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2499 error); 2500 if (mp1) 2501 putnext(UDP_RD(q), mp1); 2502 freemsg(mp); 2503 } 2504 2505 /* 2506 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2507 * The local address is filled in if endpoint is bound. The remote address 2508 * is filled in if remote address has been precified ("connected endpoint") 2509 * (The concept of connected CLTS sockets is alien to published TPI 2510 * but we support it anyway). 2511 */ 2512 static void 2513 udp_addr_req(queue_t *q, mblk_t *mp) 2514 { 2515 sin_t *sin; 2516 sin6_t *sin6; 2517 mblk_t *ackmp; 2518 struct T_addr_ack *taa; 2519 udp_t *udp = Q_TO_UDP(q); 2520 2521 /* Make it large enough for worst case */ 2522 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2523 2 * sizeof (sin6_t), 1); 2524 if (ackmp == NULL) { 2525 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2526 return; 2527 } 2528 taa = (struct T_addr_ack *)ackmp->b_rptr; 2529 2530 bzero(taa, sizeof (struct T_addr_ack)); 2531 ackmp->b_wptr = (uchar_t *)&taa[1]; 2532 2533 taa->PRIM_type = T_ADDR_ACK; 2534 ackmp->b_datap->db_type = M_PCPROTO; 2535 /* 2536 * Note: Following code assumes 32 bit alignment of basic 2537 * data structures like sin_t and struct T_addr_ack. 2538 */ 2539 if (udp->udp_state != TS_UNBND) { 2540 /* 2541 * Fill in local address first 2542 */ 2543 taa->LOCADDR_offset = sizeof (*taa); 2544 if (udp->udp_family == AF_INET) { 2545 taa->LOCADDR_length = sizeof (sin_t); 2546 sin = (sin_t *)&taa[1]; 2547 /* Fill zeroes and then initialize non-zero fields */ 2548 *sin = sin_null; 2549 sin->sin_family = AF_INET; 2550 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2551 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2552 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2553 sin->sin_addr.s_addr); 2554 } else { 2555 /* 2556 * INADDR_ANY 2557 * udp_v6src is not set, we might be bound to 2558 * broadcast/multicast. Use udp_bound_v6src as 2559 * local address instead (that could 2560 * also still be INADDR_ANY) 2561 */ 2562 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2563 sin->sin_addr.s_addr); 2564 } 2565 sin->sin_port = udp->udp_port; 2566 ackmp->b_wptr = (uchar_t *)&sin[1]; 2567 if (udp->udp_state == TS_DATA_XFER) { 2568 /* 2569 * connected, fill remote address too 2570 */ 2571 taa->REMADDR_length = sizeof (sin_t); 2572 /* assumed 32-bit alignment */ 2573 taa->REMADDR_offset = taa->LOCADDR_offset + 2574 taa->LOCADDR_length; 2575 2576 sin = (sin_t *)(ackmp->b_rptr + 2577 taa->REMADDR_offset); 2578 /* initialize */ 2579 *sin = sin_null; 2580 sin->sin_family = AF_INET; 2581 sin->sin_addr.s_addr = 2582 V4_PART_OF_V6(udp->udp_v6dst); 2583 sin->sin_port = udp->udp_dstport; 2584 ackmp->b_wptr = (uchar_t *)&sin[1]; 2585 } 2586 } else { 2587 taa->LOCADDR_length = sizeof (sin6_t); 2588 sin6 = (sin6_t *)&taa[1]; 2589 /* Fill zeroes and then initialize non-zero fields */ 2590 *sin6 = sin6_null; 2591 sin6->sin6_family = AF_INET6; 2592 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2593 sin6->sin6_addr = udp->udp_v6src; 2594 } else { 2595 /* 2596 * UNSPECIFIED 2597 * udp_v6src is not set, we might be bound to 2598 * broadcast/multicast. Use udp_bound_v6src as 2599 * local address instead (that could 2600 * also still be UNSPECIFIED) 2601 */ 2602 sin6->sin6_addr = 2603 udp->udp_bound_v6src; 2604 } 2605 sin6->sin6_port = udp->udp_port; 2606 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2607 if (udp->udp_state == TS_DATA_XFER) { 2608 /* 2609 * connected, fill remote address too 2610 */ 2611 taa->REMADDR_length = sizeof (sin6_t); 2612 /* assumed 32-bit alignment */ 2613 taa->REMADDR_offset = taa->LOCADDR_offset + 2614 taa->LOCADDR_length; 2615 2616 sin6 = (sin6_t *)(ackmp->b_rptr + 2617 taa->REMADDR_offset); 2618 /* initialize */ 2619 *sin6 = sin6_null; 2620 sin6->sin6_family = AF_INET6; 2621 sin6->sin6_addr = udp->udp_v6dst; 2622 sin6->sin6_port = udp->udp_dstport; 2623 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2624 } 2625 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2626 } 2627 } 2628 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2629 putnext(UDP_RD(q), ackmp); 2630 } 2631 2632 static void 2633 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2634 { 2635 if (udp->udp_family == AF_INET) { 2636 *tap = udp_g_t_info_ack_ipv4; 2637 } else { 2638 *tap = udp_g_t_info_ack_ipv6; 2639 } 2640 tap->CURRENT_state = udp->udp_state; 2641 tap->OPT_size = udp_max_optsize; 2642 } 2643 2644 /* 2645 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2646 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2647 * udp_g_t_info_ack. The current state of the stream is copied from 2648 * udp_state. 2649 */ 2650 static void 2651 udp_capability_req(queue_t *q, mblk_t *mp) 2652 { 2653 t_uscalar_t cap_bits1; 2654 struct T_capability_ack *tcap; 2655 udp_t *udp = Q_TO_UDP(q); 2656 2657 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2658 2659 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2660 mp->b_datap->db_type, T_CAPABILITY_ACK); 2661 if (!mp) 2662 return; 2663 2664 tcap = (struct T_capability_ack *)mp->b_rptr; 2665 tcap->CAP_bits1 = 0; 2666 2667 if (cap_bits1 & TC1_INFO) { 2668 udp_copy_info(&tcap->INFO_ack, udp); 2669 tcap->CAP_bits1 |= TC1_INFO; 2670 } 2671 2672 putnext(UDP_RD(q), mp); 2673 } 2674 2675 /* 2676 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2677 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2678 * The current state of the stream is copied from udp_state. 2679 */ 2680 static void 2681 udp_info_req(queue_t *q, mblk_t *mp) 2682 { 2683 udp_t *udp = Q_TO_UDP(q); 2684 2685 /* Create a T_INFO_ACK message. */ 2686 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2687 T_INFO_ACK); 2688 if (!mp) 2689 return; 2690 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2691 putnext(UDP_RD(q), mp); 2692 } 2693 2694 /* 2695 * IP recognizes seven kinds of bind requests: 2696 * 2697 * - A zero-length address binds only to the protocol number. 2698 * 2699 * - A 4-byte address is treated as a request to 2700 * validate that the address is a valid local IPv4 2701 * address, appropriate for an application to bind to. 2702 * IP does the verification, but does not make any note 2703 * of the address at this time. 2704 * 2705 * - A 16-byte address contains is treated as a request 2706 * to validate a local IPv6 address, as the 4-byte 2707 * address case above. 2708 * 2709 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2710 * use it for the inbound fanout of packets. 2711 * 2712 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2713 * use it for the inbound fanout of packets. 2714 * 2715 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2716 * information consisting of local and remote addresses 2717 * and ports. In this case, the addresses are both 2718 * validated as appropriate for this operation, and, if 2719 * so, the information is retained for use in the 2720 * inbound fanout. 2721 * 2722 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2723 * fanout information, like the 12-byte case above. 2724 * 2725 * IP will also fill in the IRE request mblk with information 2726 * regarding our peer. In all cases, we notify IP of our protocol 2727 * type by appending a single protocol byte to the bind request. 2728 */ 2729 static mblk_t * 2730 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2731 { 2732 char *cp; 2733 mblk_t *mp; 2734 struct T_bind_req *tbr; 2735 ipa_conn_t *ac; 2736 ipa6_conn_t *ac6; 2737 sin_t *sin; 2738 sin6_t *sin6; 2739 2740 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2741 2742 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2743 if (!mp) 2744 return (mp); 2745 mp->b_datap->db_type = M_PROTO; 2746 tbr = (struct T_bind_req *)mp->b_rptr; 2747 tbr->PRIM_type = bind_prim; 2748 tbr->ADDR_offset = sizeof (*tbr); 2749 tbr->CONIND_number = 0; 2750 tbr->ADDR_length = addr_length; 2751 cp = (char *)&tbr[1]; 2752 switch (addr_length) { 2753 case sizeof (ipa_conn_t): 2754 ASSERT(udp->udp_family == AF_INET); 2755 /* Append a request for an IRE */ 2756 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2757 if (!mp->b_cont) { 2758 freemsg(mp); 2759 return (NULL); 2760 } 2761 mp->b_cont->b_wptr += sizeof (ire_t); 2762 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2763 2764 /* cp known to be 32 bit aligned */ 2765 ac = (ipa_conn_t *)cp; 2766 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2767 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2768 ac->ac_fport = udp->udp_dstport; 2769 ac->ac_lport = udp->udp_port; 2770 break; 2771 2772 case sizeof (ipa6_conn_t): 2773 ASSERT(udp->udp_family == AF_INET6); 2774 /* Append a request for an IRE */ 2775 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2776 if (!mp->b_cont) { 2777 freemsg(mp); 2778 return (NULL); 2779 } 2780 mp->b_cont->b_wptr += sizeof (ire_t); 2781 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2782 2783 /* cp known to be 32 bit aligned */ 2784 ac6 = (ipa6_conn_t *)cp; 2785 ac6->ac6_laddr = udp->udp_v6src; 2786 ac6->ac6_faddr = udp->udp_v6dst; 2787 ac6->ac6_fport = udp->udp_dstport; 2788 ac6->ac6_lport = udp->udp_port; 2789 break; 2790 2791 case sizeof (sin_t): 2792 ASSERT(udp->udp_family == AF_INET); 2793 /* Append a request for an IRE */ 2794 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2795 if (!mp->b_cont) { 2796 freemsg(mp); 2797 return (NULL); 2798 } 2799 mp->b_cont->b_wptr += sizeof (ire_t); 2800 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2801 2802 sin = (sin_t *)cp; 2803 *sin = sin_null; 2804 sin->sin_family = AF_INET; 2805 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2806 sin->sin_port = udp->udp_port; 2807 break; 2808 2809 case sizeof (sin6_t): 2810 ASSERT(udp->udp_family == AF_INET6); 2811 /* Append a request for an IRE */ 2812 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2813 if (!mp->b_cont) { 2814 freemsg(mp); 2815 return (NULL); 2816 } 2817 mp->b_cont->b_wptr += sizeof (ire_t); 2818 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2819 2820 sin6 = (sin6_t *)cp; 2821 *sin6 = sin6_null; 2822 sin6->sin6_family = AF_INET6; 2823 sin6->sin6_addr = udp->udp_bound_v6src; 2824 sin6->sin6_port = udp->udp_port; 2825 break; 2826 } 2827 /* Add protocol number to end */ 2828 cp[addr_length] = (char)IPPROTO_UDP; 2829 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2830 return (mp); 2831 } 2832 2833 /* 2834 * This is the open routine for udp. It allocates a udp_t structure for 2835 * the stream and, on the first open of the module, creates an ND table. 2836 */ 2837 /* ARGSUSED */ 2838 static int 2839 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2840 { 2841 int err; 2842 udp_t *udp; 2843 conn_t *connp; 2844 queue_t *ip_wq; 2845 zoneid_t zoneid; 2846 netstack_t *ns; 2847 udp_stack_t *us; 2848 2849 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2850 2851 /* If the stream is already open, return immediately. */ 2852 if (q->q_ptr != NULL) 2853 return (0); 2854 2855 /* If this is not a push of udp as a module, fail. */ 2856 if (sflag != MODOPEN) 2857 return (EINVAL); 2858 2859 ns = netstack_find_by_cred(credp); 2860 ASSERT(ns != NULL); 2861 us = ns->netstack_udp; 2862 ASSERT(us != NULL); 2863 2864 /* 2865 * For exclusive stacks we set the zoneid to zero 2866 * to make UDP operate as if in the global zone. 2867 */ 2868 if (us->us_netstack->netstack_stackid != GLOBAL_NETSTACKID) 2869 zoneid = GLOBAL_ZONEID; 2870 else 2871 zoneid = crgetzoneid(credp); 2872 2873 q->q_hiwat = us->us_recv_hiwat; 2874 WR(q)->q_hiwat = us->us_xmit_hiwat; 2875 WR(q)->q_lowat = us->us_xmit_lowat; 2876 2877 /* Insert ourselves in the stream since we're about to walk q_next */ 2878 qprocson(q); 2879 2880 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2881 bzero(udp, sizeof (*udp)); 2882 2883 /* 2884 * UDP is supported only as a module and it has to be pushed directly 2885 * above the device instance of IP. If UDP is pushed anywhere else 2886 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2887 * sake of MIB browsers and fail everything else. 2888 */ 2889 ip_wq = WR(q)->q_next; 2890 if (NOT_OVER_IP(ip_wq)) { 2891 /* Support just SNMP for MIB browsers */ 2892 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP, 2893 us->us_netstack); 2894 connp->conn_rq = q; 2895 connp->conn_wq = WR(q); 2896 connp->conn_flags |= IPCL_UDPMOD; 2897 connp->conn_cred = credp; 2898 connp->conn_zoneid = zoneid; 2899 connp->conn_udp = udp; 2900 udp->udp_us = us; 2901 udp->udp_connp = connp; 2902 q->q_ptr = WR(q)->q_ptr = connp; 2903 crhold(credp); 2904 q->q_qinfo = &udp_snmp_rinit; 2905 WR(q)->q_qinfo = &udp_snmp_winit; 2906 return (0); 2907 } 2908 2909 /* 2910 * Initialize the udp_t structure for this stream. 2911 */ 2912 q = RD(ip_wq); 2913 connp = Q_TO_CONN(q); 2914 mutex_enter(&connp->conn_lock); 2915 connp->conn_proto = IPPROTO_UDP; 2916 connp->conn_flags |= IPCL_UDP; 2917 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2918 connp->conn_udp = udp; 2919 2920 /* Set the initial state of the stream and the privilege status. */ 2921 udp->udp_connp = connp; 2922 udp->udp_state = TS_UNBND; 2923 udp->udp_mode = UDP_MT_HOT; 2924 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2925 udp->udp_family = AF_INET6; 2926 udp->udp_ipversion = IPV6_VERSION; 2927 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2928 udp->udp_ttl = us->us_ipv6_hoplimit; 2929 connp->conn_af_isv6 = B_TRUE; 2930 connp->conn_flags |= IPCL_ISV6; 2931 } else { 2932 udp->udp_family = AF_INET; 2933 udp->udp_ipversion = IPV4_VERSION; 2934 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2935 udp->udp_ttl = us->us_ipv4_ttl; 2936 connp->conn_af_isv6 = B_FALSE; 2937 connp->conn_flags &= ~IPCL_ISV6; 2938 } 2939 2940 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2941 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2942 connp->conn_zoneid = zoneid; 2943 2944 udp->udp_open_time = lbolt64; 2945 udp->udp_open_pid = curproc->p_pid; 2946 2947 /* 2948 * If the caller has the process-wide flag set, then default to MAC 2949 * exempt mode. This allows read-down to unlabeled hosts. 2950 */ 2951 if (getpflags(NET_MAC_AWARE, credp) != 0) 2952 udp->udp_mac_exempt = B_TRUE; 2953 2954 if (connp->conn_flags & IPCL_SOCKET) { 2955 udp->udp_issocket = B_TRUE; 2956 udp->udp_direct_sockfs = B_TRUE; 2957 } 2958 2959 connp->conn_ulp_labeled = is_system_labeled(); 2960 2961 mutex_exit(&connp->conn_lock); 2962 udp->udp_us = us; 2963 2964 /* 2965 * The transmit hiwat/lowat is only looked at on IP's queue. 2966 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2967 * getsockopts. 2968 */ 2969 q->q_hiwat = us->us_recv_hiwat; 2970 WR(q)->q_hiwat = us->us_xmit_hiwat; 2971 WR(q)->q_lowat = us->us_xmit_lowat; 2972 2973 if (udp->udp_family == AF_INET6) { 2974 /* Build initial header template for transmit */ 2975 if ((err = udp_build_hdrs(q, udp)) != 0) { 2976 /* XXX missing free of connp? crfree? netstack_rele? */ 2977 qprocsoff(UDP_RD(q)); 2978 udp->udp_connp = NULL; 2979 connp->conn_udp = NULL; 2980 kmem_cache_free(udp_cache, udp); 2981 return (err); 2982 } 2983 } 2984 2985 /* Set the Stream head write offset and high watermark. */ 2986 (void) mi_set_sth_wroff(UDP_RD(q), 2987 udp->udp_max_hdr_len + us->us_wroff_extra); 2988 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2989 2990 return (0); 2991 } 2992 2993 /* 2994 * Which UDP options OK to set through T_UNITDATA_REQ... 2995 */ 2996 /* ARGSUSED */ 2997 static boolean_t 2998 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2999 { 3000 return (B_TRUE); 3001 } 3002 3003 /* 3004 * This routine gets default values of certain options whose default 3005 * values are maintained by protcol specific code 3006 */ 3007 /* ARGSUSED */ 3008 int 3009 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3010 { 3011 udp_t *udp = Q_TO_UDP(q); 3012 udp_stack_t *us = udp->udp_us; 3013 int *i1 = (int *)ptr; 3014 3015 switch (level) { 3016 case IPPROTO_IP: 3017 switch (name) { 3018 case IP_MULTICAST_TTL: 3019 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 3020 return (sizeof (uchar_t)); 3021 case IP_MULTICAST_LOOP: 3022 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 3023 return (sizeof (uchar_t)); 3024 } 3025 break; 3026 case IPPROTO_IPV6: 3027 switch (name) { 3028 case IPV6_MULTICAST_HOPS: 3029 *i1 = IP_DEFAULT_MULTICAST_TTL; 3030 return (sizeof (int)); 3031 case IPV6_MULTICAST_LOOP: 3032 *i1 = IP_DEFAULT_MULTICAST_LOOP; 3033 return (sizeof (int)); 3034 case IPV6_UNICAST_HOPS: 3035 *i1 = us->us_ipv6_hoplimit; 3036 return (sizeof (int)); 3037 } 3038 break; 3039 } 3040 return (-1); 3041 } 3042 3043 /* 3044 * This routine retrieves the current status of socket options 3045 * and expects the caller to pass in the queue pointer of the 3046 * upper instance. It returns the size of the option retrieved. 3047 */ 3048 int 3049 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3050 { 3051 int *i1 = (int *)ptr; 3052 conn_t *connp; 3053 udp_t *udp; 3054 ip6_pkt_t *ipp; 3055 int len; 3056 udp_stack_t *us; 3057 3058 q = UDP_WR(q); 3059 connp = Q_TO_CONN(q); 3060 udp = connp->conn_udp; 3061 ipp = &udp->udp_sticky_ipp; 3062 us = udp->udp_us; 3063 3064 switch (level) { 3065 case SOL_SOCKET: 3066 switch (name) { 3067 case SO_DEBUG: 3068 *i1 = udp->udp_debug; 3069 break; /* goto sizeof (int) option return */ 3070 case SO_REUSEADDR: 3071 *i1 = udp->udp_reuseaddr; 3072 break; /* goto sizeof (int) option return */ 3073 case SO_TYPE: 3074 *i1 = SOCK_DGRAM; 3075 break; /* goto sizeof (int) option return */ 3076 3077 /* 3078 * The following three items are available here, 3079 * but are only meaningful to IP. 3080 */ 3081 case SO_DONTROUTE: 3082 *i1 = udp->udp_dontroute; 3083 break; /* goto sizeof (int) option return */ 3084 case SO_USELOOPBACK: 3085 *i1 = udp->udp_useloopback; 3086 break; /* goto sizeof (int) option return */ 3087 case SO_BROADCAST: 3088 *i1 = udp->udp_broadcast; 3089 break; /* goto sizeof (int) option return */ 3090 3091 case SO_SNDBUF: 3092 *i1 = q->q_hiwat; 3093 break; /* goto sizeof (int) option return */ 3094 case SO_RCVBUF: 3095 *i1 = RD(q)->q_hiwat; 3096 break; /* goto sizeof (int) option return */ 3097 case SO_DGRAM_ERRIND: 3098 *i1 = udp->udp_dgram_errind; 3099 break; /* goto sizeof (int) option return */ 3100 case SO_RECVUCRED: 3101 *i1 = udp->udp_recvucred; 3102 break; /* goto sizeof (int) option return */ 3103 case SO_TIMESTAMP: 3104 *i1 = udp->udp_timestamp; 3105 break; /* goto sizeof (int) option return */ 3106 case SO_ANON_MLP: 3107 *i1 = udp->udp_anon_mlp; 3108 break; /* goto sizeof (int) option return */ 3109 case SO_MAC_EXEMPT: 3110 *i1 = udp->udp_mac_exempt; 3111 break; /* goto sizeof (int) option return */ 3112 case SO_ALLZONES: 3113 *i1 = connp->conn_allzones; 3114 break; /* goto sizeof (int) option return */ 3115 case SO_EXCLBIND: 3116 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 3117 break; 3118 case SO_PROTOTYPE: 3119 *i1 = IPPROTO_UDP; 3120 break; 3121 case SO_DOMAIN: 3122 *i1 = udp->udp_family; 3123 break; 3124 default: 3125 return (-1); 3126 } 3127 break; 3128 case IPPROTO_IP: 3129 if (udp->udp_family != AF_INET) 3130 return (-1); 3131 switch (name) { 3132 case IP_OPTIONS: 3133 case T_IP_OPTIONS: 3134 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 3135 if (len > 0) { 3136 bcopy(udp->udp_ip_rcv_options + 3137 udp->udp_label_len, ptr, len); 3138 } 3139 return (len); 3140 case IP_TOS: 3141 case T_IP_TOS: 3142 *i1 = (int)udp->udp_type_of_service; 3143 break; /* goto sizeof (int) option return */ 3144 case IP_TTL: 3145 *i1 = (int)udp->udp_ttl; 3146 break; /* goto sizeof (int) option return */ 3147 case IP_NEXTHOP: 3148 case IP_RECVPKTINFO: 3149 /* 3150 * This also handles IP_PKTINFO. 3151 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 3152 * Differentiation is based on the size of the argument 3153 * passed in. 3154 * This option is handled in IP which will return an 3155 * error for IP_PKTINFO as it's not supported as a 3156 * sticky option. 3157 */ 3158 return (-EINVAL); 3159 case IP_MULTICAST_IF: 3160 /* 0 address if not set */ 3161 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3162 return (sizeof (ipaddr_t)); 3163 case IP_MULTICAST_TTL: 3164 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3165 return (sizeof (uchar_t)); 3166 case IP_MULTICAST_LOOP: 3167 *ptr = connp->conn_multicast_loop; 3168 return (sizeof (uint8_t)); 3169 case IP_RECVOPTS: 3170 *i1 = udp->udp_recvopts; 3171 break; /* goto sizeof (int) option return */ 3172 case IP_RECVDSTADDR: 3173 *i1 = udp->udp_recvdstaddr; 3174 break; /* goto sizeof (int) option return */ 3175 case IP_RECVIF: 3176 *i1 = udp->udp_recvif; 3177 break; /* goto sizeof (int) option return */ 3178 case IP_RECVSLLA: 3179 *i1 = udp->udp_recvslla; 3180 break; /* goto sizeof (int) option return */ 3181 case IP_RECVTTL: 3182 *i1 = udp->udp_recvttl; 3183 break; /* goto sizeof (int) option return */ 3184 case IP_ADD_MEMBERSHIP: 3185 case IP_DROP_MEMBERSHIP: 3186 case IP_BLOCK_SOURCE: 3187 case IP_UNBLOCK_SOURCE: 3188 case IP_ADD_SOURCE_MEMBERSHIP: 3189 case IP_DROP_SOURCE_MEMBERSHIP: 3190 case MCAST_JOIN_GROUP: 3191 case MCAST_LEAVE_GROUP: 3192 case MCAST_BLOCK_SOURCE: 3193 case MCAST_UNBLOCK_SOURCE: 3194 case MCAST_JOIN_SOURCE_GROUP: 3195 case MCAST_LEAVE_SOURCE_GROUP: 3196 case IP_DONTFAILOVER_IF: 3197 /* cannot "get" the value for these */ 3198 return (-1); 3199 case IP_BOUND_IF: 3200 /* Zero if not set */ 3201 *i1 = udp->udp_bound_if; 3202 break; /* goto sizeof (int) option return */ 3203 case IP_UNSPEC_SRC: 3204 *i1 = udp->udp_unspec_source; 3205 break; /* goto sizeof (int) option return */ 3206 case IP_XMIT_IF: 3207 *i1 = udp->udp_xmit_if; 3208 break; /* goto sizeof (int) option return */ 3209 default: 3210 return (-1); 3211 } 3212 break; 3213 case IPPROTO_IPV6: 3214 if (udp->udp_family != AF_INET6) 3215 return (-1); 3216 switch (name) { 3217 case IPV6_UNICAST_HOPS: 3218 *i1 = (unsigned int)udp->udp_ttl; 3219 break; /* goto sizeof (int) option return */ 3220 case IPV6_MULTICAST_IF: 3221 /* 0 index if not set */ 3222 *i1 = udp->udp_multicast_if_index; 3223 break; /* goto sizeof (int) option return */ 3224 case IPV6_MULTICAST_HOPS: 3225 *i1 = udp->udp_multicast_ttl; 3226 break; /* goto sizeof (int) option return */ 3227 case IPV6_MULTICAST_LOOP: 3228 *i1 = connp->conn_multicast_loop; 3229 break; /* goto sizeof (int) option return */ 3230 case IPV6_JOIN_GROUP: 3231 case IPV6_LEAVE_GROUP: 3232 case MCAST_JOIN_GROUP: 3233 case MCAST_LEAVE_GROUP: 3234 case MCAST_BLOCK_SOURCE: 3235 case MCAST_UNBLOCK_SOURCE: 3236 case MCAST_JOIN_SOURCE_GROUP: 3237 case MCAST_LEAVE_SOURCE_GROUP: 3238 /* cannot "get" the value for these */ 3239 return (-1); 3240 case IPV6_BOUND_IF: 3241 /* Zero if not set */ 3242 *i1 = udp->udp_bound_if; 3243 break; /* goto sizeof (int) option return */ 3244 case IPV6_UNSPEC_SRC: 3245 *i1 = udp->udp_unspec_source; 3246 break; /* goto sizeof (int) option return */ 3247 case IPV6_RECVPKTINFO: 3248 *i1 = udp->udp_ip_recvpktinfo; 3249 break; /* goto sizeof (int) option return */ 3250 case IPV6_RECVTCLASS: 3251 *i1 = udp->udp_ipv6_recvtclass; 3252 break; /* goto sizeof (int) option return */ 3253 case IPV6_RECVPATHMTU: 3254 *i1 = udp->udp_ipv6_recvpathmtu; 3255 break; /* goto sizeof (int) option return */ 3256 case IPV6_RECVHOPLIMIT: 3257 *i1 = udp->udp_ipv6_recvhoplimit; 3258 break; /* goto sizeof (int) option return */ 3259 case IPV6_RECVHOPOPTS: 3260 *i1 = udp->udp_ipv6_recvhopopts; 3261 break; /* goto sizeof (int) option return */ 3262 case IPV6_RECVDSTOPTS: 3263 *i1 = udp->udp_ipv6_recvdstopts; 3264 break; /* goto sizeof (int) option return */ 3265 case _OLD_IPV6_RECVDSTOPTS: 3266 *i1 = udp->udp_old_ipv6_recvdstopts; 3267 break; /* goto sizeof (int) option return */ 3268 case IPV6_RECVRTHDRDSTOPTS: 3269 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3270 break; /* goto sizeof (int) option return */ 3271 case IPV6_RECVRTHDR: 3272 *i1 = udp->udp_ipv6_recvrthdr; 3273 break; /* goto sizeof (int) option return */ 3274 case IPV6_PKTINFO: { 3275 /* XXX assumes that caller has room for max size! */ 3276 struct in6_pktinfo *pkti; 3277 3278 pkti = (struct in6_pktinfo *)ptr; 3279 if (ipp->ipp_fields & IPPF_IFINDEX) 3280 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3281 else 3282 pkti->ipi6_ifindex = 0; 3283 if (ipp->ipp_fields & IPPF_ADDR) 3284 pkti->ipi6_addr = ipp->ipp_addr; 3285 else 3286 pkti->ipi6_addr = ipv6_all_zeros; 3287 return (sizeof (struct in6_pktinfo)); 3288 } 3289 case IPV6_TCLASS: 3290 if (ipp->ipp_fields & IPPF_TCLASS) 3291 *i1 = ipp->ipp_tclass; 3292 else 3293 *i1 = IPV6_FLOW_TCLASS( 3294 IPV6_DEFAULT_VERS_AND_FLOW); 3295 break; /* goto sizeof (int) option return */ 3296 case IPV6_NEXTHOP: { 3297 sin6_t *sin6 = (sin6_t *)ptr; 3298 3299 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3300 return (0); 3301 *sin6 = sin6_null; 3302 sin6->sin6_family = AF_INET6; 3303 sin6->sin6_addr = ipp->ipp_nexthop; 3304 return (sizeof (sin6_t)); 3305 } 3306 case IPV6_HOPOPTS: 3307 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3308 return (0); 3309 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 3310 return (0); 3311 /* 3312 * The cipso/label option is added by kernel. 3313 * User is not usually aware of this option. 3314 * We copy out the hbh opt after the label option. 3315 */ 3316 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 3317 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3318 if (udp->udp_label_len_v6 > 0) { 3319 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 3320 ptr[1] = (ipp->ipp_hopoptslen - 3321 udp->udp_label_len_v6 + 7) / 8 - 1; 3322 } 3323 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3324 case IPV6_RTHDRDSTOPTS: 3325 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3326 return (0); 3327 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3328 return (ipp->ipp_rtdstoptslen); 3329 case IPV6_RTHDR: 3330 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3331 return (0); 3332 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3333 return (ipp->ipp_rthdrlen); 3334 case IPV6_DSTOPTS: 3335 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3336 return (0); 3337 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3338 return (ipp->ipp_dstoptslen); 3339 case IPV6_PATHMTU: 3340 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3341 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 3342 us->us_netstack)); 3343 default: 3344 return (-1); 3345 } 3346 break; 3347 case IPPROTO_UDP: 3348 switch (name) { 3349 case UDP_ANONPRIVBIND: 3350 *i1 = udp->udp_anon_priv_bind; 3351 break; 3352 case UDP_EXCLBIND: 3353 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3354 break; 3355 case UDP_RCVHDR: 3356 *i1 = udp->udp_rcvhdr ? 1 : 0; 3357 break; 3358 case UDP_NAT_T_ENDPOINT: 3359 *i1 = udp->udp_nat_t_endpoint; 3360 break; 3361 default: 3362 return (-1); 3363 } 3364 break; 3365 default: 3366 return (-1); 3367 } 3368 return (sizeof (int)); 3369 } 3370 3371 /* 3372 * This routine sets socket options; it expects the caller 3373 * to pass in the queue pointer of the upper instance. 3374 */ 3375 /* ARGSUSED */ 3376 int 3377 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3378 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3379 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3380 { 3381 udpattrs_t *attrs = thisdg_attrs; 3382 int *i1 = (int *)invalp; 3383 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3384 boolean_t checkonly; 3385 int error; 3386 conn_t *connp; 3387 udp_t *udp; 3388 uint_t newlen; 3389 udp_stack_t *us; 3390 3391 q = UDP_WR(q); 3392 connp = Q_TO_CONN(q); 3393 udp = connp->conn_udp; 3394 us = udp->udp_us; 3395 3396 switch (optset_context) { 3397 case SETFN_OPTCOM_CHECKONLY: 3398 checkonly = B_TRUE; 3399 /* 3400 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3401 * inlen != 0 implies value supplied and 3402 * we have to "pretend" to set it. 3403 * inlen == 0 implies that there is no 3404 * value part in T_CHECK request and just validation 3405 * done elsewhere should be enough, we just return here. 3406 */ 3407 if (inlen == 0) { 3408 *outlenp = 0; 3409 return (0); 3410 } 3411 break; 3412 case SETFN_OPTCOM_NEGOTIATE: 3413 checkonly = B_FALSE; 3414 break; 3415 case SETFN_UD_NEGOTIATE: 3416 case SETFN_CONN_NEGOTIATE: 3417 checkonly = B_FALSE; 3418 /* 3419 * Negotiating local and "association-related" options 3420 * through T_UNITDATA_REQ. 3421 * 3422 * Following routine can filter out ones we do not 3423 * want to be "set" this way. 3424 */ 3425 if (!udp_opt_allow_udr_set(level, name)) { 3426 *outlenp = 0; 3427 return (EINVAL); 3428 } 3429 break; 3430 default: 3431 /* 3432 * We should never get here 3433 */ 3434 *outlenp = 0; 3435 return (EINVAL); 3436 } 3437 3438 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3439 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3440 3441 /* 3442 * For fixed length options, no sanity check 3443 * of passed in length is done. It is assumed *_optcom_req() 3444 * routines do the right thing. 3445 */ 3446 3447 switch (level) { 3448 case SOL_SOCKET: 3449 switch (name) { 3450 case SO_REUSEADDR: 3451 if (!checkonly) 3452 udp->udp_reuseaddr = onoff; 3453 break; 3454 case SO_DEBUG: 3455 if (!checkonly) 3456 udp->udp_debug = onoff; 3457 break; 3458 /* 3459 * The following three items are available here, 3460 * but are only meaningful to IP. 3461 */ 3462 case SO_DONTROUTE: 3463 if (!checkonly) 3464 udp->udp_dontroute = onoff; 3465 break; 3466 case SO_USELOOPBACK: 3467 if (!checkonly) 3468 udp->udp_useloopback = onoff; 3469 break; 3470 case SO_BROADCAST: 3471 if (!checkonly) 3472 udp->udp_broadcast = onoff; 3473 break; 3474 3475 case SO_SNDBUF: 3476 if (*i1 > us->us_max_buf) { 3477 *outlenp = 0; 3478 return (ENOBUFS); 3479 } 3480 if (!checkonly) { 3481 q->q_hiwat = *i1; 3482 WR(UDP_RD(q))->q_hiwat = *i1; 3483 } 3484 break; 3485 case SO_RCVBUF: 3486 if (*i1 > us->us_max_buf) { 3487 *outlenp = 0; 3488 return (ENOBUFS); 3489 } 3490 if (!checkonly) { 3491 RD(q)->q_hiwat = *i1; 3492 UDP_RD(q)->q_hiwat = *i1; 3493 (void) mi_set_sth_hiwat(UDP_RD(q), 3494 udp_set_rcv_hiwat(udp, *i1)); 3495 } 3496 break; 3497 case SO_DGRAM_ERRIND: 3498 if (!checkonly) 3499 udp->udp_dgram_errind = onoff; 3500 break; 3501 case SO_RECVUCRED: 3502 if (!checkonly) 3503 udp->udp_recvucred = onoff; 3504 break; 3505 case SO_ALLZONES: 3506 /* 3507 * "soft" error (negative) 3508 * option not handled at this level 3509 * Do not modify *outlenp. 3510 */ 3511 return (-EINVAL); 3512 case SO_TIMESTAMP: 3513 if (!checkonly) 3514 udp->udp_timestamp = onoff; 3515 break; 3516 case SO_ANON_MLP: 3517 if (!checkonly) 3518 udp->udp_anon_mlp = onoff; 3519 break; 3520 case SO_MAC_EXEMPT: 3521 if (secpolicy_net_mac_aware(cr) != 0 || 3522 udp->udp_state != TS_UNBND) 3523 return (EACCES); 3524 if (!checkonly) 3525 udp->udp_mac_exempt = onoff; 3526 break; 3527 case SCM_UCRED: { 3528 struct ucred_s *ucr; 3529 cred_t *cr, *newcr; 3530 ts_label_t *tsl; 3531 3532 /* 3533 * Only sockets that have proper privileges and are 3534 * bound to MLPs will have any other value here, so 3535 * this implicitly tests for privilege to set label. 3536 */ 3537 if (connp->conn_mlp_type == mlptSingle) 3538 break; 3539 ucr = (struct ucred_s *)invalp; 3540 if (inlen != ucredsize || 3541 ucr->uc_labeloff < sizeof (*ucr) || 3542 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3543 return (EINVAL); 3544 if (!checkonly) { 3545 mblk_t *mb; 3546 3547 if (attrs == NULL || 3548 (mb = attrs->udpattr_mb) == NULL) 3549 return (EINVAL); 3550 if ((cr = DB_CRED(mb)) == NULL) 3551 cr = udp->udp_connp->conn_cred; 3552 ASSERT(cr != NULL); 3553 if ((tsl = crgetlabel(cr)) == NULL) 3554 return (EINVAL); 3555 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3556 tsl->tsl_doi, KM_NOSLEEP); 3557 if (newcr == NULL) 3558 return (ENOSR); 3559 mblk_setcred(mb, newcr); 3560 attrs->udpattr_credset = B_TRUE; 3561 crfree(newcr); 3562 } 3563 break; 3564 } 3565 case SO_EXCLBIND: 3566 if (!checkonly) 3567 udp->udp_exclbind = onoff; 3568 break; 3569 default: 3570 *outlenp = 0; 3571 return (EINVAL); 3572 } 3573 break; 3574 case IPPROTO_IP: 3575 if (udp->udp_family != AF_INET) { 3576 *outlenp = 0; 3577 return (ENOPROTOOPT); 3578 } 3579 switch (name) { 3580 case IP_OPTIONS: 3581 case T_IP_OPTIONS: 3582 /* Save options for use by IP. */ 3583 newlen = inlen + udp->udp_label_len; 3584 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3585 *outlenp = 0; 3586 return (EINVAL); 3587 } 3588 if (checkonly) 3589 break; 3590 3591 if (!tsol_option_set(&udp->udp_ip_snd_options, 3592 &udp->udp_ip_snd_options_len, 3593 udp->udp_label_len, invalp, inlen)) { 3594 *outlenp = 0; 3595 return (ENOMEM); 3596 } 3597 3598 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3599 UDPH_SIZE + udp->udp_ip_snd_options_len; 3600 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3601 us->us_wroff_extra); 3602 break; 3603 3604 case IP_TTL: 3605 if (!checkonly) { 3606 udp->udp_ttl = (uchar_t)*i1; 3607 } 3608 break; 3609 case IP_TOS: 3610 case T_IP_TOS: 3611 if (!checkonly) { 3612 udp->udp_type_of_service = (uchar_t)*i1; 3613 } 3614 break; 3615 case IP_MULTICAST_IF: { 3616 /* 3617 * TODO should check OPTMGMT reply and undo this if 3618 * there is an error. 3619 */ 3620 struct in_addr *inap = (struct in_addr *)invalp; 3621 if (!checkonly) { 3622 udp->udp_multicast_if_addr = 3623 inap->s_addr; 3624 } 3625 break; 3626 } 3627 case IP_MULTICAST_TTL: 3628 if (!checkonly) 3629 udp->udp_multicast_ttl = *invalp; 3630 break; 3631 case IP_MULTICAST_LOOP: 3632 if (!checkonly) 3633 connp->conn_multicast_loop = *invalp; 3634 break; 3635 case IP_RECVOPTS: 3636 if (!checkonly) 3637 udp->udp_recvopts = onoff; 3638 break; 3639 case IP_RECVDSTADDR: 3640 if (!checkonly) 3641 udp->udp_recvdstaddr = onoff; 3642 break; 3643 case IP_RECVIF: 3644 if (!checkonly) 3645 udp->udp_recvif = onoff; 3646 break; 3647 case IP_RECVSLLA: 3648 if (!checkonly) 3649 udp->udp_recvslla = onoff; 3650 break; 3651 case IP_RECVTTL: 3652 if (!checkonly) 3653 udp->udp_recvttl = onoff; 3654 break; 3655 case IP_PKTINFO: { 3656 /* 3657 * This also handles IP_RECVPKTINFO. 3658 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3659 * Differentiation is based on the size of the 3660 * argument passed in. 3661 */ 3662 struct in_pktinfo *pktinfop; 3663 ip4_pkt_t *attr_pktinfop; 3664 3665 if (checkonly) 3666 break; 3667 3668 if (inlen == sizeof (int)) { 3669 /* 3670 * This is IP_RECVPKTINFO option. 3671 * Keep a local copy of whether this option is 3672 * set or not and pass it down to IP for 3673 * processing. 3674 */ 3675 3676 udp->udp_ip_recvpktinfo = onoff; 3677 return (-EINVAL); 3678 } 3679 3680 if (attrs == NULL || 3681 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3682 /* 3683 * sticky option or no buffer to return 3684 * the results. 3685 */ 3686 return (EINVAL); 3687 } 3688 3689 if (inlen != sizeof (struct in_pktinfo)) 3690 return (EINVAL); 3691 3692 pktinfop = (struct in_pktinfo *)invalp; 3693 3694 /* 3695 * At least one of the values should be specified 3696 */ 3697 if (pktinfop->ipi_ifindex == 0 && 3698 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3699 return (EINVAL); 3700 } 3701 3702 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3703 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3704 3705 break; 3706 } 3707 case IP_ADD_MEMBERSHIP: 3708 case IP_DROP_MEMBERSHIP: 3709 case IP_BLOCK_SOURCE: 3710 case IP_UNBLOCK_SOURCE: 3711 case IP_ADD_SOURCE_MEMBERSHIP: 3712 case IP_DROP_SOURCE_MEMBERSHIP: 3713 case MCAST_JOIN_GROUP: 3714 case MCAST_LEAVE_GROUP: 3715 case MCAST_BLOCK_SOURCE: 3716 case MCAST_UNBLOCK_SOURCE: 3717 case MCAST_JOIN_SOURCE_GROUP: 3718 case MCAST_LEAVE_SOURCE_GROUP: 3719 case IP_SEC_OPT: 3720 case IP_NEXTHOP: 3721 /* 3722 * "soft" error (negative) 3723 * option not handled at this level 3724 * Do not modify *outlenp. 3725 */ 3726 return (-EINVAL); 3727 case IP_BOUND_IF: 3728 if (!checkonly) 3729 udp->udp_bound_if = *i1; 3730 break; 3731 case IP_UNSPEC_SRC: 3732 if (!checkonly) 3733 udp->udp_unspec_source = onoff; 3734 break; 3735 case IP_XMIT_IF: 3736 if (!checkonly) 3737 udp->udp_xmit_if = *i1; 3738 break; 3739 default: 3740 *outlenp = 0; 3741 return (EINVAL); 3742 } 3743 break; 3744 case IPPROTO_IPV6: { 3745 ip6_pkt_t *ipp; 3746 boolean_t sticky; 3747 3748 if (udp->udp_family != AF_INET6) { 3749 *outlenp = 0; 3750 return (ENOPROTOOPT); 3751 } 3752 /* 3753 * Deal with both sticky options and ancillary data 3754 */ 3755 sticky = B_FALSE; 3756 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3757 NULL) { 3758 /* sticky options, or none */ 3759 ipp = &udp->udp_sticky_ipp; 3760 sticky = B_TRUE; 3761 } 3762 3763 switch (name) { 3764 case IPV6_MULTICAST_IF: 3765 if (!checkonly) 3766 udp->udp_multicast_if_index = *i1; 3767 break; 3768 case IPV6_UNICAST_HOPS: 3769 /* -1 means use default */ 3770 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3771 *outlenp = 0; 3772 return (EINVAL); 3773 } 3774 if (!checkonly) { 3775 if (*i1 == -1) { 3776 udp->udp_ttl = ipp->ipp_unicast_hops = 3777 us->us_ipv6_hoplimit; 3778 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3779 /* Pass modified value to IP. */ 3780 *i1 = udp->udp_ttl; 3781 } else { 3782 udp->udp_ttl = ipp->ipp_unicast_hops = 3783 (uint8_t)*i1; 3784 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3785 } 3786 /* Rebuild the header template */ 3787 error = udp_build_hdrs(q, udp); 3788 if (error != 0) { 3789 *outlenp = 0; 3790 return (error); 3791 } 3792 } 3793 break; 3794 case IPV6_MULTICAST_HOPS: 3795 /* -1 means use default */ 3796 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3797 *outlenp = 0; 3798 return (EINVAL); 3799 } 3800 if (!checkonly) { 3801 if (*i1 == -1) { 3802 udp->udp_multicast_ttl = 3803 ipp->ipp_multicast_hops = 3804 IP_DEFAULT_MULTICAST_TTL; 3805 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3806 /* Pass modified value to IP. */ 3807 *i1 = udp->udp_multicast_ttl; 3808 } else { 3809 udp->udp_multicast_ttl = 3810 ipp->ipp_multicast_hops = 3811 (uint8_t)*i1; 3812 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3813 } 3814 } 3815 break; 3816 case IPV6_MULTICAST_LOOP: 3817 if (*i1 != 0 && *i1 != 1) { 3818 *outlenp = 0; 3819 return (EINVAL); 3820 } 3821 if (!checkonly) 3822 connp->conn_multicast_loop = *i1; 3823 break; 3824 case IPV6_JOIN_GROUP: 3825 case IPV6_LEAVE_GROUP: 3826 case MCAST_JOIN_GROUP: 3827 case MCAST_LEAVE_GROUP: 3828 case MCAST_BLOCK_SOURCE: 3829 case MCAST_UNBLOCK_SOURCE: 3830 case MCAST_JOIN_SOURCE_GROUP: 3831 case MCAST_LEAVE_SOURCE_GROUP: 3832 /* 3833 * "soft" error (negative) 3834 * option not handled at this level 3835 * Note: Do not modify *outlenp 3836 */ 3837 return (-EINVAL); 3838 case IPV6_BOUND_IF: 3839 if (!checkonly) 3840 udp->udp_bound_if = *i1; 3841 break; 3842 case IPV6_UNSPEC_SRC: 3843 if (!checkonly) 3844 udp->udp_unspec_source = onoff; 3845 break; 3846 /* 3847 * Set boolean switches for ancillary data delivery 3848 */ 3849 case IPV6_RECVPKTINFO: 3850 if (!checkonly) 3851 udp->udp_ip_recvpktinfo = onoff; 3852 break; 3853 case IPV6_RECVTCLASS: 3854 if (!checkonly) { 3855 udp->udp_ipv6_recvtclass = onoff; 3856 } 3857 break; 3858 case IPV6_RECVPATHMTU: 3859 if (!checkonly) { 3860 udp->udp_ipv6_recvpathmtu = onoff; 3861 } 3862 break; 3863 case IPV6_RECVHOPLIMIT: 3864 if (!checkonly) 3865 udp->udp_ipv6_recvhoplimit = onoff; 3866 break; 3867 case IPV6_RECVHOPOPTS: 3868 if (!checkonly) 3869 udp->udp_ipv6_recvhopopts = onoff; 3870 break; 3871 case IPV6_RECVDSTOPTS: 3872 if (!checkonly) 3873 udp->udp_ipv6_recvdstopts = onoff; 3874 break; 3875 case _OLD_IPV6_RECVDSTOPTS: 3876 if (!checkonly) 3877 udp->udp_old_ipv6_recvdstopts = onoff; 3878 break; 3879 case IPV6_RECVRTHDRDSTOPTS: 3880 if (!checkonly) 3881 udp->udp_ipv6_recvrthdrdstopts = onoff; 3882 break; 3883 case IPV6_RECVRTHDR: 3884 if (!checkonly) 3885 udp->udp_ipv6_recvrthdr = onoff; 3886 break; 3887 /* 3888 * Set sticky options or ancillary data. 3889 * If sticky options, (re)build any extension headers 3890 * that might be needed as a result. 3891 */ 3892 case IPV6_PKTINFO: 3893 /* 3894 * The source address and ifindex are verified 3895 * in ip_opt_set(). For ancillary data the 3896 * source address is checked in ip_wput_v6. 3897 */ 3898 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3899 return (EINVAL); 3900 if (checkonly) 3901 break; 3902 3903 if (inlen == 0) { 3904 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3905 ipp->ipp_sticky_ignored |= 3906 (IPPF_IFINDEX|IPPF_ADDR); 3907 } else { 3908 struct in6_pktinfo *pkti; 3909 3910 pkti = (struct in6_pktinfo *)invalp; 3911 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3912 ipp->ipp_addr = pkti->ipi6_addr; 3913 if (ipp->ipp_ifindex != 0) 3914 ipp->ipp_fields |= IPPF_IFINDEX; 3915 else 3916 ipp->ipp_fields &= ~IPPF_IFINDEX; 3917 if (!IN6_IS_ADDR_UNSPECIFIED( 3918 &ipp->ipp_addr)) 3919 ipp->ipp_fields |= IPPF_ADDR; 3920 else 3921 ipp->ipp_fields &= ~IPPF_ADDR; 3922 } 3923 if (sticky) { 3924 error = udp_build_hdrs(q, udp); 3925 if (error != 0) 3926 return (error); 3927 } 3928 break; 3929 case IPV6_HOPLIMIT: 3930 if (sticky) 3931 return (EINVAL); 3932 if (inlen != 0 && inlen != sizeof (int)) 3933 return (EINVAL); 3934 if (checkonly) 3935 break; 3936 3937 if (inlen == 0) { 3938 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3939 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3940 } else { 3941 if (*i1 > 255 || *i1 < -1) 3942 return (EINVAL); 3943 if (*i1 == -1) 3944 ipp->ipp_hoplimit = 3945 us->us_ipv6_hoplimit; 3946 else 3947 ipp->ipp_hoplimit = *i1; 3948 ipp->ipp_fields |= IPPF_HOPLIMIT; 3949 } 3950 break; 3951 case IPV6_TCLASS: 3952 if (inlen != 0 && inlen != sizeof (int)) 3953 return (EINVAL); 3954 if (checkonly) 3955 break; 3956 3957 if (inlen == 0) { 3958 ipp->ipp_fields &= ~IPPF_TCLASS; 3959 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3960 } else { 3961 if (*i1 > 255 || *i1 < -1) 3962 return (EINVAL); 3963 if (*i1 == -1) 3964 ipp->ipp_tclass = 0; 3965 else 3966 ipp->ipp_tclass = *i1; 3967 ipp->ipp_fields |= IPPF_TCLASS; 3968 } 3969 if (sticky) { 3970 error = udp_build_hdrs(q, udp); 3971 if (error != 0) 3972 return (error); 3973 } 3974 break; 3975 case IPV6_NEXTHOP: 3976 /* 3977 * IP will verify that the nexthop is reachable 3978 * and fail for sticky options. 3979 */ 3980 if (inlen != 0 && inlen != sizeof (sin6_t)) 3981 return (EINVAL); 3982 if (checkonly) 3983 break; 3984 3985 if (inlen == 0) { 3986 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3987 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3988 } else { 3989 sin6_t *sin6 = (sin6_t *)invalp; 3990 3991 if (sin6->sin6_family != AF_INET6) 3992 return (EAFNOSUPPORT); 3993 if (IN6_IS_ADDR_V4MAPPED( 3994 &sin6->sin6_addr)) 3995 return (EADDRNOTAVAIL); 3996 ipp->ipp_nexthop = sin6->sin6_addr; 3997 if (!IN6_IS_ADDR_UNSPECIFIED( 3998 &ipp->ipp_nexthop)) 3999 ipp->ipp_fields |= IPPF_NEXTHOP; 4000 else 4001 ipp->ipp_fields &= ~IPPF_NEXTHOP; 4002 } 4003 if (sticky) { 4004 error = udp_build_hdrs(q, udp); 4005 if (error != 0) 4006 return (error); 4007 } 4008 break; 4009 case IPV6_HOPOPTS: { 4010 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 4011 /* 4012 * Sanity checks - minimum size, size a multiple of 4013 * eight bytes, and matching size passed in. 4014 */ 4015 if (inlen != 0 && 4016 inlen != (8 * (hopts->ip6h_len + 1))) 4017 return (EINVAL); 4018 4019 if (checkonly) 4020 break; 4021 4022 error = optcom_pkt_set(invalp, inlen, sticky, 4023 (uchar_t **)&ipp->ipp_hopopts, 4024 &ipp->ipp_hopoptslen, 4025 sticky ? udp->udp_label_len_v6 : 0); 4026 if (error != 0) 4027 return (error); 4028 if (ipp->ipp_hopoptslen == 0) { 4029 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4030 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 4031 } else { 4032 ipp->ipp_fields |= IPPF_HOPOPTS; 4033 } 4034 if (sticky) { 4035 error = udp_build_hdrs(q, udp); 4036 if (error != 0) 4037 return (error); 4038 } 4039 break; 4040 } 4041 case IPV6_RTHDRDSTOPTS: { 4042 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4043 4044 /* 4045 * Sanity checks - minimum size, size a multiple of 4046 * eight bytes, and matching size passed in. 4047 */ 4048 if (inlen != 0 && 4049 inlen != (8 * (dopts->ip6d_len + 1))) 4050 return (EINVAL); 4051 4052 if (checkonly) 4053 break; 4054 4055 if (inlen == 0) { 4056 if (sticky && 4057 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 4058 kmem_free(ipp->ipp_rtdstopts, 4059 ipp->ipp_rtdstoptslen); 4060 ipp->ipp_rtdstopts = NULL; 4061 ipp->ipp_rtdstoptslen = 0; 4062 } 4063 4064 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 4065 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 4066 } else { 4067 error = optcom_pkt_set(invalp, inlen, sticky, 4068 (uchar_t **)&ipp->ipp_rtdstopts, 4069 &ipp->ipp_rtdstoptslen, 0); 4070 if (error != 0) 4071 return (error); 4072 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4073 } 4074 if (sticky) { 4075 error = udp_build_hdrs(q, udp); 4076 if (error != 0) 4077 return (error); 4078 } 4079 break; 4080 } 4081 case IPV6_DSTOPTS: { 4082 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4083 4084 /* 4085 * Sanity checks - minimum size, size a multiple of 4086 * eight bytes, and matching size passed in. 4087 */ 4088 if (inlen != 0 && 4089 inlen != (8 * (dopts->ip6d_len + 1))) 4090 return (EINVAL); 4091 4092 if (checkonly) 4093 break; 4094 4095 if (inlen == 0) { 4096 if (sticky && 4097 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 4098 kmem_free(ipp->ipp_dstopts, 4099 ipp->ipp_dstoptslen); 4100 ipp->ipp_dstopts = NULL; 4101 ipp->ipp_dstoptslen = 0; 4102 } 4103 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4104 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 4105 } else { 4106 error = optcom_pkt_set(invalp, inlen, sticky, 4107 (uchar_t **)&ipp->ipp_dstopts, 4108 &ipp->ipp_dstoptslen, 0); 4109 if (error != 0) 4110 return (error); 4111 ipp->ipp_fields |= IPPF_DSTOPTS; 4112 } 4113 if (sticky) { 4114 error = udp_build_hdrs(q, udp); 4115 if (error != 0) 4116 return (error); 4117 } 4118 break; 4119 } 4120 case IPV6_RTHDR: { 4121 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 4122 4123 /* 4124 * Sanity checks - minimum size, size a multiple of 4125 * eight bytes, and matching size passed in. 4126 */ 4127 if (inlen != 0 && 4128 inlen != (8 * (rt->ip6r_len + 1))) 4129 return (EINVAL); 4130 4131 if (checkonly) 4132 break; 4133 4134 if (inlen == 0) { 4135 if (sticky && 4136 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 4137 kmem_free(ipp->ipp_rthdr, 4138 ipp->ipp_rthdrlen); 4139 ipp->ipp_rthdr = NULL; 4140 ipp->ipp_rthdrlen = 0; 4141 } 4142 ipp->ipp_fields &= ~IPPF_RTHDR; 4143 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 4144 } else { 4145 error = optcom_pkt_set(invalp, inlen, sticky, 4146 (uchar_t **)&ipp->ipp_rthdr, 4147 &ipp->ipp_rthdrlen, 0); 4148 if (error != 0) 4149 return (error); 4150 ipp->ipp_fields |= IPPF_RTHDR; 4151 } 4152 if (sticky) { 4153 error = udp_build_hdrs(q, udp); 4154 if (error != 0) 4155 return (error); 4156 } 4157 break; 4158 } 4159 4160 case IPV6_DONTFRAG: 4161 if (checkonly) 4162 break; 4163 4164 if (onoff) { 4165 ipp->ipp_fields |= IPPF_DONTFRAG; 4166 } else { 4167 ipp->ipp_fields &= ~IPPF_DONTFRAG; 4168 } 4169 break; 4170 4171 case IPV6_USE_MIN_MTU: 4172 if (inlen != sizeof (int)) 4173 return (EINVAL); 4174 4175 if (*i1 < -1 || *i1 > 1) 4176 return (EINVAL); 4177 4178 if (checkonly) 4179 break; 4180 4181 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 4182 ipp->ipp_use_min_mtu = *i1; 4183 break; 4184 4185 case IPV6_BOUND_PIF: 4186 case IPV6_SEC_OPT: 4187 case IPV6_DONTFAILOVER_IF: 4188 case IPV6_SRC_PREFERENCES: 4189 case IPV6_V6ONLY: 4190 /* Handled at the IP level */ 4191 return (-EINVAL); 4192 default: 4193 *outlenp = 0; 4194 return (EINVAL); 4195 } 4196 break; 4197 } /* end IPPROTO_IPV6 */ 4198 case IPPROTO_UDP: 4199 switch (name) { 4200 case UDP_ANONPRIVBIND: 4201 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 4202 *outlenp = 0; 4203 return (error); 4204 } 4205 if (!checkonly) { 4206 udp->udp_anon_priv_bind = onoff; 4207 } 4208 break; 4209 case UDP_EXCLBIND: 4210 if (!checkonly) 4211 udp->udp_exclbind = onoff; 4212 break; 4213 case UDP_RCVHDR: 4214 if (!checkonly) 4215 udp->udp_rcvhdr = onoff; 4216 break; 4217 case UDP_NAT_T_ENDPOINT: 4218 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 4219 *outlenp = 0; 4220 return (error); 4221 } 4222 4223 /* 4224 * Use udp_family instead so we can avoid ambiguitites 4225 * with AF_INET6 sockets that may switch from IPv4 4226 * to IPv6. 4227 */ 4228 if (udp->udp_family != AF_INET) { 4229 *outlenp = 0; 4230 return (EAFNOSUPPORT); 4231 } 4232 4233 if (!checkonly) { 4234 udp->udp_nat_t_endpoint = onoff; 4235 4236 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 4237 UDPH_SIZE + udp->udp_ip_snd_options_len; 4238 4239 /* Also, adjust wroff */ 4240 if (onoff) { 4241 udp->udp_max_hdr_len += 4242 sizeof (uint32_t); 4243 } 4244 (void) mi_set_sth_wroff(RD(q), 4245 udp->udp_max_hdr_len + us->us_wroff_extra); 4246 } 4247 break; 4248 default: 4249 *outlenp = 0; 4250 return (EINVAL); 4251 } 4252 break; 4253 default: 4254 *outlenp = 0; 4255 return (EINVAL); 4256 } 4257 /* 4258 * Common case of OK return with outval same as inval. 4259 */ 4260 if (invalp != outvalp) { 4261 /* don't trust bcopy for identical src/dst */ 4262 (void) bcopy(invalp, outvalp, inlen); 4263 } 4264 *outlenp = inlen; 4265 return (0); 4266 } 4267 4268 /* 4269 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 4270 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 4271 * headers, and the udp header. 4272 * Returns failure if can't allocate memory. 4273 */ 4274 static int 4275 udp_build_hdrs(queue_t *q, udp_t *udp) 4276 { 4277 udp_stack_t *us = udp->udp_us; 4278 uchar_t *hdrs; 4279 uint_t hdrs_len; 4280 ip6_t *ip6h; 4281 ip6i_t *ip6i; 4282 udpha_t *udpha; 4283 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 4284 4285 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 4286 ASSERT(hdrs_len != 0); 4287 if (hdrs_len != udp->udp_sticky_hdrs_len) { 4288 /* Need to reallocate */ 4289 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 4290 if (hdrs == NULL) 4291 return (ENOMEM); 4292 4293 if (udp->udp_sticky_hdrs_len != 0) { 4294 kmem_free(udp->udp_sticky_hdrs, 4295 udp->udp_sticky_hdrs_len); 4296 } 4297 udp->udp_sticky_hdrs = hdrs; 4298 udp->udp_sticky_hdrs_len = hdrs_len; 4299 } 4300 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 4301 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 4302 4303 /* Set header fields not in ipp */ 4304 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 4305 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4306 ip6h = (ip6_t *)&ip6i[1]; 4307 } else { 4308 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4309 } 4310 4311 if (!(ipp->ipp_fields & IPPF_ADDR)) 4312 ip6h->ip6_src = udp->udp_v6src; 4313 4314 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4315 udpha->uha_src_port = udp->udp_port; 4316 4317 /* Try to get everything in a single mblk */ 4318 if (hdrs_len > udp->udp_max_hdr_len) { 4319 udp->udp_max_hdr_len = hdrs_len; 4320 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4321 us->us_wroff_extra); 4322 } 4323 return (0); 4324 } 4325 4326 /* 4327 * This routine retrieves the value of an ND variable in a udpparam_t 4328 * structure. It is called through nd_getset when a user reads the 4329 * variable. 4330 */ 4331 /* ARGSUSED */ 4332 static int 4333 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4334 { 4335 udpparam_t *udppa = (udpparam_t *)cp; 4336 4337 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4338 return (0); 4339 } 4340 4341 /* 4342 * Walk through the param array specified registering each element with the 4343 * named dispatch (ND) handler. 4344 */ 4345 static boolean_t 4346 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 4347 { 4348 for (; cnt-- > 0; udppa++) { 4349 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4350 if (!nd_load(ndp, udppa->udp_param_name, 4351 udp_param_get, udp_param_set, 4352 (caddr_t)udppa)) { 4353 nd_free(ndp); 4354 return (B_FALSE); 4355 } 4356 } 4357 } 4358 if (!nd_load(ndp, "udp_extra_priv_ports", 4359 udp_extra_priv_ports_get, NULL, NULL)) { 4360 nd_free(ndp); 4361 return (B_FALSE); 4362 } 4363 if (!nd_load(ndp, "udp_extra_priv_ports_add", 4364 NULL, udp_extra_priv_ports_add, NULL)) { 4365 nd_free(ndp); 4366 return (B_FALSE); 4367 } 4368 if (!nd_load(ndp, "udp_extra_priv_ports_del", 4369 NULL, udp_extra_priv_ports_del, NULL)) { 4370 nd_free(ndp); 4371 return (B_FALSE); 4372 } 4373 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 4374 NULL)) { 4375 nd_free(ndp); 4376 return (B_FALSE); 4377 } 4378 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 4379 NULL)) { 4380 nd_free(ndp); 4381 return (B_FALSE); 4382 } 4383 return (B_TRUE); 4384 } 4385 4386 /* This routine sets an ND variable in a udpparam_t structure. */ 4387 /* ARGSUSED */ 4388 static int 4389 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4390 { 4391 long new_value; 4392 udpparam_t *udppa = (udpparam_t *)cp; 4393 4394 /* 4395 * Fail the request if the new value does not lie within the 4396 * required bounds. 4397 */ 4398 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4399 new_value < udppa->udp_param_min || 4400 new_value > udppa->udp_param_max) { 4401 return (EINVAL); 4402 } 4403 4404 /* Set the new value */ 4405 udppa->udp_param_value = new_value; 4406 return (0); 4407 } 4408 4409 /* 4410 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 4411 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 4412 * just count the length needed for allocation. If 'dbuf' is non-NULL, 4413 * then it's assumed to be allocated to be large enough. 4414 * 4415 * Returns zero if trimming of the security option causes all options to go 4416 * away. 4417 */ 4418 static size_t 4419 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 4420 { 4421 struct T_opthdr *toh; 4422 size_t hol = ipp->ipp_hopoptslen; 4423 ip6_hbh_t *dstopt = NULL; 4424 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4425 size_t tlen, olen, plen; 4426 boolean_t deleting; 4427 const struct ip6_opt *sopt, *lastpad; 4428 struct ip6_opt *dopt; 4429 4430 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4431 toh->level = IPPROTO_IPV6; 4432 toh->name = IPV6_HOPOPTS; 4433 toh->status = 0; 4434 dstopt = (ip6_hbh_t *)(toh + 1); 4435 } 4436 4437 /* 4438 * If labeling is enabled, then skip the label option 4439 * but get other options if there are any. 4440 */ 4441 if (is_system_labeled()) { 4442 dopt = NULL; 4443 if (dstopt != NULL) { 4444 /* will fill in ip6h_len later */ 4445 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4446 dopt = (struct ip6_opt *)(dstopt + 1); 4447 } 4448 sopt = (const struct ip6_opt *)(srcopt + 1); 4449 hol -= sizeof (*srcopt); 4450 tlen = sizeof (*dstopt); 4451 lastpad = NULL; 4452 deleting = B_FALSE; 4453 /* 4454 * This loop finds the first (lastpad pointer) of any number of 4455 * pads that preceeds the security option, then treats the 4456 * security option as though it were a pad, and then finds the 4457 * next non-pad option (or end of list). 4458 * 4459 * It then treats the entire block as one big pad. To preserve 4460 * alignment of any options that follow, or just the end of the 4461 * list, it computes a minimal new padding size that keeps the 4462 * same alignment for the next option. 4463 * 4464 * If it encounters just a sequence of pads with no security 4465 * option, those are copied as-is rather than collapsed. 4466 * 4467 * Note that to handle the end of list case, the code makes one 4468 * loop with 'hol' set to zero. 4469 */ 4470 for (;;) { 4471 if (hol > 0) { 4472 if (sopt->ip6o_type == IP6OPT_PAD1) { 4473 if (lastpad == NULL) 4474 lastpad = sopt; 4475 sopt = (const struct ip6_opt *) 4476 &sopt->ip6o_len; 4477 hol--; 4478 continue; 4479 } 4480 olen = sopt->ip6o_len + sizeof (*sopt); 4481 if (olen > hol) 4482 olen = hol; 4483 if (sopt->ip6o_type == IP6OPT_PADN || 4484 sopt->ip6o_type == ip6opt_ls) { 4485 if (sopt->ip6o_type == ip6opt_ls) 4486 deleting = B_TRUE; 4487 if (lastpad == NULL) 4488 lastpad = sopt; 4489 sopt = (const struct ip6_opt *) 4490 ((const char *)sopt + olen); 4491 hol -= olen; 4492 continue; 4493 } 4494 } else { 4495 /* if nothing was copied at all, then delete */ 4496 if (tlen == sizeof (*dstopt)) 4497 return (0); 4498 /* last pass; pick up any trailing padding */ 4499 olen = 0; 4500 } 4501 if (deleting) { 4502 /* 4503 * compute aligning effect of deleted material 4504 * to reproduce with pad. 4505 */ 4506 plen = ((const char *)sopt - 4507 (const char *)lastpad) & 7; 4508 tlen += plen; 4509 if (dopt != NULL) { 4510 if (plen == 1) { 4511 dopt->ip6o_type = IP6OPT_PAD1; 4512 } else if (plen > 1) { 4513 plen -= sizeof (*dopt); 4514 dopt->ip6o_type = IP6OPT_PADN; 4515 dopt->ip6o_len = plen; 4516 if (plen > 0) 4517 bzero(dopt + 1, plen); 4518 } 4519 dopt = (struct ip6_opt *) 4520 ((char *)dopt + plen); 4521 } 4522 deleting = B_FALSE; 4523 lastpad = NULL; 4524 } 4525 /* if there's uncopied padding, then copy that now */ 4526 if (lastpad != NULL) { 4527 olen += (const char *)sopt - 4528 (const char *)lastpad; 4529 sopt = lastpad; 4530 lastpad = NULL; 4531 } 4532 if (dopt != NULL && olen > 0) { 4533 bcopy(sopt, dopt, olen); 4534 dopt = (struct ip6_opt *)((char *)dopt + olen); 4535 } 4536 if (hol == 0) 4537 break; 4538 tlen += olen; 4539 sopt = (const struct ip6_opt *) 4540 ((const char *)sopt + olen); 4541 hol -= olen; 4542 } 4543 /* go back and patch up the length value, rounded upward */ 4544 if (dstopt != NULL) 4545 dstopt->ip6h_len = (tlen - 1) >> 3; 4546 } else { 4547 tlen = hol; 4548 if (dstopt != NULL) 4549 bcopy(srcopt, dstopt, hol); 4550 } 4551 4552 tlen += sizeof (*toh); 4553 if (toh != NULL) 4554 toh->len = tlen; 4555 4556 return (tlen); 4557 } 4558 4559 static void 4560 udp_input(conn_t *connp, mblk_t *mp) 4561 { 4562 struct T_unitdata_ind *tudi; 4563 uchar_t *rptr; /* Pointer to IP header */ 4564 int hdr_length; /* Length of IP+UDP headers */ 4565 int udi_size; /* Size of T_unitdata_ind */ 4566 int mp_len; 4567 udp_t *udp; 4568 udpha_t *udpha; 4569 int ipversion; 4570 ip6_pkt_t ipp; 4571 ip6_t *ip6h; 4572 ip6i_t *ip6i; 4573 mblk_t *mp1; 4574 mblk_t *options_mp = NULL; 4575 ip_pktinfo_t *pinfo = NULL; 4576 cred_t *cr = NULL; 4577 queue_t *q = connp->conn_rq; 4578 pid_t cpid; 4579 cred_t *rcr = connp->conn_cred; 4580 udp_stack_t *us; 4581 4582 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4583 "udp_rput_start: q %p mp %p", q, mp); 4584 4585 udp = connp->conn_udp; 4586 us = udp->udp_us; 4587 rptr = mp->b_rptr; 4588 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4589 ASSERT(OK_32PTR(rptr)); 4590 4591 /* 4592 * IP should have prepended the options data in an M_CTL 4593 * Check M_CTL "type" to make sure are not here bcos of 4594 * a valid ICMP message 4595 */ 4596 if (DB_TYPE(mp) == M_CTL) { 4597 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4598 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4599 IN_PKTINFO) { 4600 /* 4601 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4602 * has been appended to the packet by IP. We need to 4603 * extract the mblk and adjust the rptr 4604 */ 4605 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4606 options_mp = mp; 4607 mp = mp->b_cont; 4608 rptr = mp->b_rptr; 4609 UDP_STAT(us, udp_in_pktinfo); 4610 } else { 4611 /* 4612 * ICMP messages. 4613 */ 4614 udp_icmp_error(q, mp); 4615 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4616 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4617 return; 4618 } 4619 } 4620 4621 mp_len = msgdsize(mp); 4622 /* 4623 * This is the inbound data path. 4624 * First, we check to make sure the IP version number is correct, 4625 * and then pull the IP and UDP headers into the first mblk. 4626 * Assume IP provides aligned packets - otherwise toss. 4627 * Also, check if we have a complete IP header. 4628 */ 4629 4630 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4631 ipp.ipp_fields = 0; 4632 4633 ipversion = IPH_HDR_VERSION(rptr); 4634 switch (ipversion) { 4635 case IPV4_VERSION: 4636 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4637 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4638 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4639 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4640 (udp->udp_ip_rcv_options_len)) { 4641 /* 4642 * Handle IPv4 packets with options outside of the 4643 * main data path. Not needed for AF_INET6 sockets 4644 * since they don't support a getsockopt of IP_OPTIONS. 4645 */ 4646 if (udp->udp_family == AF_INET6) 4647 break; 4648 /* 4649 * UDP length check performed for IPv4 packets with 4650 * options to check whether UDP length specified in 4651 * the header is the same as the physical length of 4652 * the packet. 4653 */ 4654 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4655 if (mp_len != (ntohs(udpha->uha_length) + 4656 hdr_length - UDPH_SIZE)) { 4657 goto tossit; 4658 } 4659 /* 4660 * Handle the case where the packet has IP options 4661 * and the IP_RECVSLLA & IP_RECVIF are set 4662 */ 4663 if (pinfo != NULL) 4664 mp = options_mp; 4665 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4666 SQTAG_UDP_INPUT); 4667 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4668 "udp_rput_end: q %p (%S)", q, "end"); 4669 return; 4670 } 4671 4672 /* Handle IPV6_RECVHOPLIMIT. */ 4673 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4674 udp->udp_ip_recvpktinfo) { 4675 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4676 ipp.ipp_fields |= IPPF_IFINDEX; 4677 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4678 } 4679 } 4680 break; 4681 case IPV6_VERSION: 4682 /* 4683 * IPv6 packets can only be received by applications 4684 * that are prepared to receive IPv6 addresses. 4685 * The IP fanout must ensure this. 4686 */ 4687 ASSERT(udp->udp_family == AF_INET6); 4688 4689 ip6h = (ip6_t *)rptr; 4690 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4691 4692 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4693 uint8_t nexthdrp; 4694 /* Look for ifindex information */ 4695 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4696 ip6i = (ip6i_t *)ip6h; 4697 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4698 goto tossit; 4699 4700 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4701 ASSERT(ip6i->ip6i_ifindex != 0); 4702 ipp.ipp_fields |= IPPF_IFINDEX; 4703 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4704 } 4705 rptr = (uchar_t *)&ip6i[1]; 4706 mp->b_rptr = rptr; 4707 if (rptr == mp->b_wptr) { 4708 mp1 = mp->b_cont; 4709 freeb(mp); 4710 mp = mp1; 4711 rptr = mp->b_rptr; 4712 } 4713 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4714 goto tossit; 4715 ip6h = (ip6_t *)rptr; 4716 mp_len = msgdsize(mp); 4717 } 4718 /* 4719 * Find any potentially interesting extension headers 4720 * as well as the length of the IPv6 + extension 4721 * headers. 4722 */ 4723 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4724 UDPH_SIZE; 4725 ASSERT(nexthdrp == IPPROTO_UDP); 4726 } else { 4727 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4728 ip6i = NULL; 4729 } 4730 break; 4731 default: 4732 ASSERT(0); 4733 } 4734 4735 /* 4736 * IP inspected the UDP header thus all of it must be in the mblk. 4737 * UDP length check is performed for IPv6 packets and IPv4 packets 4738 * without options to check if the size of the packet as specified 4739 * by the header is the same as the physical size of the packet. 4740 */ 4741 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4742 if ((MBLKL(mp) < hdr_length) || 4743 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4744 goto tossit; 4745 } 4746 4747 /* Walk past the headers. */ 4748 if (!udp->udp_rcvhdr) { 4749 mp->b_rptr = rptr + hdr_length; 4750 mp_len -= hdr_length; 4751 } 4752 4753 /* 4754 * This is the inbound data path. Packets are passed upstream as 4755 * T_UNITDATA_IND messages with full IP headers still attached. 4756 */ 4757 if (udp->udp_family == AF_INET) { 4758 sin_t *sin; 4759 4760 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4761 4762 /* 4763 * Normally only send up the address. 4764 * If IP_RECVDSTADDR is set we include the destination IP 4765 * address as an option. With IP_RECVOPTS we include all 4766 * the IP options. Only ip_rput_other() handles packets 4767 * that contain IP options. 4768 */ 4769 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4770 if (udp->udp_recvdstaddr) { 4771 udi_size += sizeof (struct T_opthdr) + 4772 sizeof (struct in_addr); 4773 UDP_STAT(us, udp_in_recvdstaddr); 4774 } 4775 4776 if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && 4777 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4778 udi_size += sizeof (struct T_opthdr) + 4779 sizeof (struct in_pktinfo); 4780 UDP_STAT(us, udp_ip_recvpktinfo); 4781 } 4782 4783 /* 4784 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4785 * space accordingly 4786 */ 4787 if (udp->udp_recvif && (pinfo != NULL) && 4788 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4789 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4790 UDP_STAT(us, udp_in_recvif); 4791 } 4792 4793 if (udp->udp_recvslla && (pinfo != NULL) && 4794 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4795 udi_size += sizeof (struct T_opthdr) + 4796 sizeof (struct sockaddr_dl); 4797 UDP_STAT(us, udp_in_recvslla); 4798 } 4799 4800 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4801 udi_size += sizeof (struct T_opthdr) + ucredsize; 4802 cpid = DB_CPID(mp); 4803 UDP_STAT(us, udp_in_recvucred); 4804 } 4805 4806 /* 4807 * If SO_TIMESTAMP is set allocate the appropriate sized 4808 * buffer. Since gethrestime() expects a pointer aligned 4809 * argument, we allocate space necessary for extra 4810 * alignment (even though it might not be used). 4811 */ 4812 if (udp->udp_timestamp) { 4813 udi_size += sizeof (struct T_opthdr) + 4814 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4815 UDP_STAT(us, udp_in_timestamp); 4816 } 4817 4818 /* 4819 * If IP_RECVTTL is set allocate the appropriate sized buffer 4820 */ 4821 if (udp->udp_recvttl) { 4822 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4823 UDP_STAT(us, udp_in_recvttl); 4824 } 4825 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4826 4827 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4828 mp1 = allocb(udi_size, BPRI_MED); 4829 if (mp1 == NULL) { 4830 freemsg(mp); 4831 if (options_mp != NULL) 4832 freeb(options_mp); 4833 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4834 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4835 BUMP_MIB(&udp->udp_mib, udpInErrors); 4836 return; 4837 } 4838 mp1->b_cont = mp; 4839 mp = mp1; 4840 mp->b_datap->db_type = M_PROTO; 4841 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4842 mp->b_wptr = (uchar_t *)tudi + udi_size; 4843 tudi->PRIM_type = T_UNITDATA_IND; 4844 tudi->SRC_length = sizeof (sin_t); 4845 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4846 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4847 sizeof (sin_t); 4848 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4849 tudi->OPT_length = udi_size; 4850 sin = (sin_t *)&tudi[1]; 4851 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4852 sin->sin_port = udpha->uha_src_port; 4853 sin->sin_family = udp->udp_family; 4854 *(uint32_t *)&sin->sin_zero[0] = 0; 4855 *(uint32_t *)&sin->sin_zero[4] = 0; 4856 4857 /* 4858 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4859 * IP_RECVTTL has been set. 4860 */ 4861 if (udi_size != 0) { 4862 /* 4863 * Copy in destination address before options to avoid 4864 * any padding issues. 4865 */ 4866 char *dstopt; 4867 4868 dstopt = (char *)&sin[1]; 4869 if (udp->udp_recvdstaddr) { 4870 struct T_opthdr *toh; 4871 ipaddr_t *dstptr; 4872 4873 toh = (struct T_opthdr *)dstopt; 4874 toh->level = IPPROTO_IP; 4875 toh->name = IP_RECVDSTADDR; 4876 toh->len = sizeof (struct T_opthdr) + 4877 sizeof (ipaddr_t); 4878 toh->status = 0; 4879 dstopt += sizeof (struct T_opthdr); 4880 dstptr = (ipaddr_t *)dstopt; 4881 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4882 dstopt = (char *)toh + toh->len; 4883 udi_size -= toh->len; 4884 } 4885 4886 if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && 4887 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4888 struct T_opthdr *toh; 4889 struct in_pktinfo *pktinfop; 4890 4891 toh = (struct T_opthdr *)dstopt; 4892 toh->level = IPPROTO_IP; 4893 toh->name = IP_PKTINFO; 4894 toh->len = sizeof (struct T_opthdr) + 4895 sizeof (*pktinfop); 4896 toh->status = 0; 4897 dstopt += sizeof (struct T_opthdr); 4898 pktinfop = (struct in_pktinfo *)dstopt; 4899 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4900 pktinfop->ipi_spec_dst = 4901 pinfo->ip_pkt_match_addr; 4902 pktinfop->ipi_addr.s_addr = 4903 ((ipha_t *)rptr)->ipha_dst; 4904 4905 dstopt += sizeof (struct in_pktinfo); 4906 udi_size -= toh->len; 4907 } 4908 4909 if (udp->udp_recvslla && (pinfo != NULL) && 4910 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4911 4912 struct T_opthdr *toh; 4913 struct sockaddr_dl *dstptr; 4914 4915 toh = (struct T_opthdr *)dstopt; 4916 toh->level = IPPROTO_IP; 4917 toh->name = IP_RECVSLLA; 4918 toh->len = sizeof (struct T_opthdr) + 4919 sizeof (struct sockaddr_dl); 4920 toh->status = 0; 4921 dstopt += sizeof (struct T_opthdr); 4922 dstptr = (struct sockaddr_dl *)dstopt; 4923 bcopy(&pinfo->ip_pkt_slla, dstptr, 4924 sizeof (struct sockaddr_dl)); 4925 dstopt = (char *)toh + toh->len; 4926 udi_size -= toh->len; 4927 } 4928 4929 if (udp->udp_recvif && (pinfo != NULL) && 4930 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4931 4932 struct T_opthdr *toh; 4933 uint_t *dstptr; 4934 4935 toh = (struct T_opthdr *)dstopt; 4936 toh->level = IPPROTO_IP; 4937 toh->name = IP_RECVIF; 4938 toh->len = sizeof (struct T_opthdr) + 4939 sizeof (uint_t); 4940 toh->status = 0; 4941 dstopt += sizeof (struct T_opthdr); 4942 dstptr = (uint_t *)dstopt; 4943 *dstptr = pinfo->ip_pkt_ifindex; 4944 dstopt = (char *)toh + toh->len; 4945 udi_size -= toh->len; 4946 } 4947 4948 if (cr != NULL) { 4949 struct T_opthdr *toh; 4950 4951 toh = (struct T_opthdr *)dstopt; 4952 toh->level = SOL_SOCKET; 4953 toh->name = SCM_UCRED; 4954 toh->len = sizeof (struct T_opthdr) + ucredsize; 4955 toh->status = 0; 4956 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4957 dstopt = (char *)toh + toh->len; 4958 udi_size -= toh->len; 4959 } 4960 4961 if (udp->udp_timestamp) { 4962 struct T_opthdr *toh; 4963 4964 toh = (struct T_opthdr *)dstopt; 4965 toh->level = SOL_SOCKET; 4966 toh->name = SCM_TIMESTAMP; 4967 toh->len = sizeof (struct T_opthdr) + 4968 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4969 toh->status = 0; 4970 dstopt += sizeof (struct T_opthdr); 4971 /* Align for gethrestime() */ 4972 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4973 sizeof (intptr_t)); 4974 gethrestime((timestruc_t *)dstopt); 4975 dstopt = (char *)toh + toh->len; 4976 udi_size -= toh->len; 4977 } 4978 4979 /* 4980 * CAUTION: 4981 * Due to aligment issues 4982 * Processing of IP_RECVTTL option 4983 * should always be the last. Adding 4984 * any option processing after this will 4985 * cause alignment panic. 4986 */ 4987 if (udp->udp_recvttl) { 4988 struct T_opthdr *toh; 4989 uint8_t *dstptr; 4990 4991 toh = (struct T_opthdr *)dstopt; 4992 toh->level = IPPROTO_IP; 4993 toh->name = IP_RECVTTL; 4994 toh->len = sizeof (struct T_opthdr) + 4995 sizeof (uint8_t); 4996 toh->status = 0; 4997 dstopt += sizeof (struct T_opthdr); 4998 dstptr = (uint8_t *)dstopt; 4999 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5000 dstopt = (char *)toh + toh->len; 5001 udi_size -= toh->len; 5002 } 5003 5004 /* Consumed all of allocated space */ 5005 ASSERT(udi_size == 0); 5006 } 5007 } else { 5008 sin6_t *sin6; 5009 5010 /* 5011 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 5012 * 5013 * Normally we only send up the address. If receiving of any 5014 * optional receive side information is enabled, we also send 5015 * that up as options. 5016 * [ Only udp_rput_other() handles packets that contain IP 5017 * options so code to account for does not appear immediately 5018 * below but elsewhere ] 5019 */ 5020 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 5021 5022 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 5023 IPPF_RTHDR|IPPF_IFINDEX)) { 5024 if (udp->udp_ipv6_recvhopopts && 5025 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5026 size_t hlen; 5027 5028 UDP_STAT(us, udp_in_recvhopopts); 5029 hlen = copy_hop_opts(&ipp, NULL); 5030 if (hlen == 0) 5031 ipp.ipp_fields &= ~IPPF_HOPOPTS; 5032 udi_size += hlen; 5033 } 5034 if ((udp->udp_ipv6_recvdstopts || 5035 udp->udp_old_ipv6_recvdstopts) && 5036 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5037 udi_size += sizeof (struct T_opthdr) + 5038 ipp.ipp_dstoptslen; 5039 UDP_STAT(us, udp_in_recvdstopts); 5040 } 5041 if (((udp->udp_ipv6_recvdstopts && 5042 udp->udp_ipv6_recvrthdr && 5043 (ipp.ipp_fields & IPPF_RTHDR)) || 5044 udp->udp_ipv6_recvrthdrdstopts) && 5045 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5046 udi_size += sizeof (struct T_opthdr) + 5047 ipp.ipp_rtdstoptslen; 5048 UDP_STAT(us, udp_in_recvrtdstopts); 5049 } 5050 if (udp->udp_ipv6_recvrthdr && 5051 (ipp.ipp_fields & IPPF_RTHDR)) { 5052 udi_size += sizeof (struct T_opthdr) + 5053 ipp.ipp_rthdrlen; 5054 UDP_STAT(us, udp_in_recvrthdr); 5055 } 5056 if (udp->udp_ip_recvpktinfo && 5057 (ipp.ipp_fields & IPPF_IFINDEX)) { 5058 udi_size += sizeof (struct T_opthdr) + 5059 sizeof (struct in6_pktinfo); 5060 UDP_STAT(us, udp_in_recvpktinfo); 5061 } 5062 5063 } 5064 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5065 udi_size += sizeof (struct T_opthdr) + ucredsize; 5066 cpid = DB_CPID(mp); 5067 UDP_STAT(us, udp_in_recvucred); 5068 } 5069 5070 if (udp->udp_ipv6_recvhoplimit) { 5071 udi_size += sizeof (struct T_opthdr) + sizeof (int); 5072 UDP_STAT(us, udp_in_recvhoplimit); 5073 } 5074 5075 if (udp->udp_ipv6_recvtclass) { 5076 udi_size += sizeof (struct T_opthdr) + sizeof (int); 5077 UDP_STAT(us, udp_in_recvtclass); 5078 } 5079 5080 mp1 = allocb(udi_size, BPRI_MED); 5081 if (mp1 == NULL) { 5082 freemsg(mp); 5083 if (options_mp != NULL) 5084 freeb(options_mp); 5085 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5086 "udp_rput_end: q %p (%S)", q, "allocbfail"); 5087 BUMP_MIB(&udp->udp_mib, udpInErrors); 5088 return; 5089 } 5090 mp1->b_cont = mp; 5091 mp = mp1; 5092 mp->b_datap->db_type = M_PROTO; 5093 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5094 mp->b_wptr = (uchar_t *)tudi + udi_size; 5095 tudi->PRIM_type = T_UNITDATA_IND; 5096 tudi->SRC_length = sizeof (sin6_t); 5097 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5098 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 5099 sizeof (sin6_t); 5100 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 5101 tudi->OPT_length = udi_size; 5102 sin6 = (sin6_t *)&tudi[1]; 5103 if (ipversion == IPV4_VERSION) { 5104 in6_addr_t v6dst; 5105 5106 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 5107 &sin6->sin6_addr); 5108 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 5109 &v6dst); 5110 sin6->sin6_flowinfo = 0; 5111 sin6->sin6_scope_id = 0; 5112 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 5113 connp->conn_zoneid, us->us_netstack); 5114 } else { 5115 sin6->sin6_addr = ip6h->ip6_src; 5116 /* No sin6_flowinfo per API */ 5117 sin6->sin6_flowinfo = 0; 5118 /* For link-scope source pass up scope id */ 5119 if ((ipp.ipp_fields & IPPF_IFINDEX) && 5120 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 5121 sin6->sin6_scope_id = ipp.ipp_ifindex; 5122 else 5123 sin6->sin6_scope_id = 0; 5124 sin6->__sin6_src_id = ip_srcid_find_addr( 5125 &ip6h->ip6_dst, connp->conn_zoneid, 5126 us->us_netstack); 5127 } 5128 sin6->sin6_port = udpha->uha_src_port; 5129 sin6->sin6_family = udp->udp_family; 5130 5131 if (udi_size != 0) { 5132 uchar_t *dstopt; 5133 5134 dstopt = (uchar_t *)&sin6[1]; 5135 if (udp->udp_ip_recvpktinfo && 5136 (ipp.ipp_fields & IPPF_IFINDEX)) { 5137 struct T_opthdr *toh; 5138 struct in6_pktinfo *pkti; 5139 5140 toh = (struct T_opthdr *)dstopt; 5141 toh->level = IPPROTO_IPV6; 5142 toh->name = IPV6_PKTINFO; 5143 toh->len = sizeof (struct T_opthdr) + 5144 sizeof (*pkti); 5145 toh->status = 0; 5146 dstopt += sizeof (struct T_opthdr); 5147 pkti = (struct in6_pktinfo *)dstopt; 5148 if (ipversion == IPV6_VERSION) 5149 pkti->ipi6_addr = ip6h->ip6_dst; 5150 else 5151 IN6_IPADDR_TO_V4MAPPED( 5152 ((ipha_t *)rptr)->ipha_dst, 5153 &pkti->ipi6_addr); 5154 pkti->ipi6_ifindex = ipp.ipp_ifindex; 5155 dstopt += sizeof (*pkti); 5156 udi_size -= toh->len; 5157 } 5158 if (udp->udp_ipv6_recvhoplimit) { 5159 struct T_opthdr *toh; 5160 5161 toh = (struct T_opthdr *)dstopt; 5162 toh->level = IPPROTO_IPV6; 5163 toh->name = IPV6_HOPLIMIT; 5164 toh->len = sizeof (struct T_opthdr) + 5165 sizeof (uint_t); 5166 toh->status = 0; 5167 dstopt += sizeof (struct T_opthdr); 5168 if (ipversion == IPV6_VERSION) 5169 *(uint_t *)dstopt = ip6h->ip6_hops; 5170 else 5171 *(uint_t *)dstopt = 5172 ((ipha_t *)rptr)->ipha_ttl; 5173 dstopt += sizeof (uint_t); 5174 udi_size -= toh->len; 5175 } 5176 if (udp->udp_ipv6_recvtclass) { 5177 struct T_opthdr *toh; 5178 5179 toh = (struct T_opthdr *)dstopt; 5180 toh->level = IPPROTO_IPV6; 5181 toh->name = IPV6_TCLASS; 5182 toh->len = sizeof (struct T_opthdr) + 5183 sizeof (uint_t); 5184 toh->status = 0; 5185 dstopt += sizeof (struct T_opthdr); 5186 if (ipversion == IPV6_VERSION) { 5187 *(uint_t *)dstopt = 5188 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 5189 } else { 5190 ipha_t *ipha = (ipha_t *)rptr; 5191 *(uint_t *)dstopt = 5192 ipha->ipha_type_of_service; 5193 } 5194 dstopt += sizeof (uint_t); 5195 udi_size -= toh->len; 5196 } 5197 if (udp->udp_ipv6_recvhopopts && 5198 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5199 size_t hlen; 5200 5201 hlen = copy_hop_opts(&ipp, dstopt); 5202 dstopt += hlen; 5203 udi_size -= hlen; 5204 } 5205 if (udp->udp_ipv6_recvdstopts && 5206 udp->udp_ipv6_recvrthdr && 5207 (ipp.ipp_fields & IPPF_RTHDR) && 5208 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5209 struct T_opthdr *toh; 5210 5211 toh = (struct T_opthdr *)dstopt; 5212 toh->level = IPPROTO_IPV6; 5213 toh->name = IPV6_DSTOPTS; 5214 toh->len = sizeof (struct T_opthdr) + 5215 ipp.ipp_rtdstoptslen; 5216 toh->status = 0; 5217 dstopt += sizeof (struct T_opthdr); 5218 bcopy(ipp.ipp_rtdstopts, dstopt, 5219 ipp.ipp_rtdstoptslen); 5220 dstopt += ipp.ipp_rtdstoptslen; 5221 udi_size -= toh->len; 5222 } 5223 if (udp->udp_ipv6_recvrthdr && 5224 (ipp.ipp_fields & IPPF_RTHDR)) { 5225 struct T_opthdr *toh; 5226 5227 toh = (struct T_opthdr *)dstopt; 5228 toh->level = IPPROTO_IPV6; 5229 toh->name = IPV6_RTHDR; 5230 toh->len = sizeof (struct T_opthdr) + 5231 ipp.ipp_rthdrlen; 5232 toh->status = 0; 5233 dstopt += sizeof (struct T_opthdr); 5234 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 5235 dstopt += ipp.ipp_rthdrlen; 5236 udi_size -= toh->len; 5237 } 5238 if (udp->udp_ipv6_recvdstopts && 5239 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5240 struct T_opthdr *toh; 5241 5242 toh = (struct T_opthdr *)dstopt; 5243 toh->level = IPPROTO_IPV6; 5244 toh->name = IPV6_DSTOPTS; 5245 toh->len = sizeof (struct T_opthdr) + 5246 ipp.ipp_dstoptslen; 5247 toh->status = 0; 5248 dstopt += sizeof (struct T_opthdr); 5249 bcopy(ipp.ipp_dstopts, dstopt, 5250 ipp.ipp_dstoptslen); 5251 dstopt += ipp.ipp_dstoptslen; 5252 udi_size -= toh->len; 5253 } 5254 5255 if (cr != NULL) { 5256 struct T_opthdr *toh; 5257 5258 toh = (struct T_opthdr *)dstopt; 5259 toh->level = SOL_SOCKET; 5260 toh->name = SCM_UCRED; 5261 toh->len = sizeof (struct T_opthdr) + ucredsize; 5262 toh->status = 0; 5263 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5264 dstopt += toh->len; 5265 udi_size -= toh->len; 5266 } 5267 /* Consumed all of allocated space */ 5268 ASSERT(udi_size == 0); 5269 } 5270 #undef sin6 5271 /* No IP_RECVDSTADDR for IPv6. */ 5272 } 5273 5274 BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); 5275 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5276 "udp_rput_end: q %p (%S)", q, "end"); 5277 if (options_mp != NULL) 5278 freeb(options_mp); 5279 5280 if (udp->udp_direct_sockfs) { 5281 /* 5282 * There is nothing above us except for the stream head; 5283 * use the read-side synchronous stream interface in 5284 * order to reduce the time spent in interrupt thread. 5285 */ 5286 ASSERT(udp->udp_issocket); 5287 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 5288 } else { 5289 /* 5290 * Use regular STREAMS interface to pass data upstream 5291 * if this is not a socket endpoint, or if we have 5292 * switched over to the slow mode due to sockmod being 5293 * popped or a module being pushed on top of us. 5294 */ 5295 putnext(UDP_RD(q), mp); 5296 } 5297 return; 5298 5299 tossit: 5300 freemsg(mp); 5301 if (options_mp != NULL) 5302 freeb(options_mp); 5303 BUMP_MIB(&udp->udp_mib, udpInErrors); 5304 } 5305 5306 void 5307 udp_conn_recv(conn_t *connp, mblk_t *mp) 5308 { 5309 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 5310 } 5311 5312 /* ARGSUSED */ 5313 static void 5314 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 5315 { 5316 udp_input((conn_t *)arg, mp); 5317 _UDP_EXIT((conn_t *)arg); 5318 } 5319 5320 /* 5321 * Process non-M_DATA messages as well as M_DATA messages that requires 5322 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 5323 */ 5324 static void 5325 udp_rput_other(queue_t *q, mblk_t *mp) 5326 { 5327 struct T_unitdata_ind *tudi; 5328 mblk_t *mp1; 5329 uchar_t *rptr; 5330 uchar_t *new_rptr; 5331 int hdr_length; 5332 int udi_size; /* Size of T_unitdata_ind */ 5333 int opt_len; /* Length of IP options */ 5334 sin_t *sin; 5335 struct T_error_ack *tea; 5336 mblk_t *options_mp = NULL; 5337 ip_pktinfo_t *pinfo; 5338 boolean_t recv_on = B_FALSE; 5339 cred_t *cr = NULL; 5340 udp_t *udp = Q_TO_UDP(q); 5341 pid_t cpid; 5342 cred_t *rcr = udp->udp_connp->conn_cred; 5343 udp_stack_t *us = udp->udp_us; 5344 5345 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 5346 "udp_rput_other: q %p mp %p", q, mp); 5347 5348 ASSERT(OK_32PTR(mp->b_rptr)); 5349 rptr = mp->b_rptr; 5350 5351 switch (mp->b_datap->db_type) { 5352 case M_CTL: 5353 /* 5354 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 5355 */ 5356 recv_on = B_TRUE; 5357 options_mp = mp; 5358 pinfo = (ip_pktinfo_t *)options_mp->b_rptr; 5359 5360 /* 5361 * The actual data is in mp->b_cont 5362 */ 5363 mp = mp->b_cont; 5364 ASSERT(OK_32PTR(mp->b_rptr)); 5365 rptr = mp->b_rptr; 5366 break; 5367 case M_DATA: 5368 /* 5369 * M_DATA messages contain IPv4 datagrams. They are handled 5370 * after this switch. 5371 */ 5372 break; 5373 case M_PROTO: 5374 case M_PCPROTO: 5375 /* M_PROTO messages contain some type of TPI message. */ 5376 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 5377 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5378 freemsg(mp); 5379 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5380 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 5381 return; 5382 } 5383 tea = (struct T_error_ack *)rptr; 5384 5385 switch (tea->PRIM_type) { 5386 case T_ERROR_ACK: 5387 switch (tea->ERROR_prim) { 5388 case O_T_BIND_REQ: 5389 case T_BIND_REQ: { 5390 /* 5391 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5392 * clear out the associated port and source 5393 * address before passing the message 5394 * upstream. If this was caused by a T_CONN_REQ 5395 * revert back to bound state. 5396 */ 5397 udp_fanout_t *udpf; 5398 5399 udpf = &us->us_bind_fanout[UDP_BIND_HASH( 5400 udp->udp_port, us->us_bind_fanout_size)]; 5401 mutex_enter(&udpf->uf_lock); 5402 if (udp->udp_state == TS_DATA_XFER) { 5403 /* Connect failed */ 5404 tea->ERROR_prim = T_CONN_REQ; 5405 /* Revert back to the bound source */ 5406 udp->udp_v6src = udp->udp_bound_v6src; 5407 udp->udp_state = TS_IDLE; 5408 mutex_exit(&udpf->uf_lock); 5409 if (udp->udp_family == AF_INET6) 5410 (void) udp_build_hdrs(q, udp); 5411 break; 5412 } 5413 5414 if (udp->udp_discon_pending) { 5415 tea->ERROR_prim = T_DISCON_REQ; 5416 udp->udp_discon_pending = 0; 5417 } 5418 V6_SET_ZERO(udp->udp_v6src); 5419 V6_SET_ZERO(udp->udp_bound_v6src); 5420 udp->udp_state = TS_UNBND; 5421 udp_bind_hash_remove(udp, B_TRUE); 5422 udp->udp_port = 0; 5423 mutex_exit(&udpf->uf_lock); 5424 if (udp->udp_family == AF_INET6) 5425 (void) udp_build_hdrs(q, udp); 5426 break; 5427 } 5428 default: 5429 break; 5430 } 5431 break; 5432 case T_BIND_ACK: 5433 udp_rput_bind_ack(q, mp); 5434 return; 5435 5436 case T_OPTMGMT_ACK: 5437 case T_OK_ACK: 5438 break; 5439 default: 5440 freemsg(mp); 5441 return; 5442 } 5443 putnext(UDP_RD(q), mp); 5444 return; 5445 } 5446 5447 /* 5448 * This is the inbound data path. 5449 * First, we make sure the data contains both IP and UDP headers. 5450 * 5451 * This handle IPv4 packets for only AF_INET sockets. 5452 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 5453 * is no need saving the options. 5454 */ 5455 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 5456 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 5457 if (mp->b_wptr - rptr < hdr_length) { 5458 if (!pullupmsg(mp, hdr_length)) { 5459 freemsg(mp); 5460 if (options_mp != NULL) 5461 freeb(options_mp); 5462 BUMP_MIB(&udp->udp_mib, udpInErrors); 5463 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5464 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5465 return; 5466 } 5467 rptr = mp->b_rptr; 5468 } 5469 /* Walk past the headers. */ 5470 new_rptr = rptr + hdr_length; 5471 if (!udp->udp_rcvhdr) 5472 mp->b_rptr = new_rptr; 5473 5474 /* Save the options if any */ 5475 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5476 if (opt_len > 0) { 5477 if (opt_len > udp->udp_ip_rcv_options_len) { 5478 if (udp->udp_ip_rcv_options_len) 5479 mi_free((char *)udp->udp_ip_rcv_options); 5480 udp->udp_ip_rcv_options_len = 0; 5481 udp->udp_ip_rcv_options = 5482 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5483 if (udp->udp_ip_rcv_options) 5484 udp->udp_ip_rcv_options_len = opt_len; 5485 } 5486 if (udp->udp_ip_rcv_options_len) { 5487 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5488 udp->udp_ip_rcv_options, opt_len); 5489 /* Adjust length if we are resusing the space */ 5490 udp->udp_ip_rcv_options_len = opt_len; 5491 } 5492 } else if (udp->udp_ip_rcv_options_len) { 5493 mi_free((char *)udp->udp_ip_rcv_options); 5494 udp->udp_ip_rcv_options = NULL; 5495 udp->udp_ip_rcv_options_len = 0; 5496 } 5497 5498 /* 5499 * Normally only send up the address. 5500 * If IP_RECVDSTADDR is set we include the destination IP 5501 * address as an option. With IP_RECVOPTS we include all 5502 * the IP options. 5503 */ 5504 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5505 if (udp->udp_recvdstaddr) { 5506 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5507 UDP_STAT(us, udp_in_recvdstaddr); 5508 } 5509 5510 if (udp->udp_ip_recvpktinfo && recv_on && 5511 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 5512 udi_size += sizeof (struct T_opthdr) + 5513 sizeof (struct in_pktinfo); 5514 UDP_STAT(us, udp_ip_recvpktinfo); 5515 } 5516 5517 if (udp->udp_recvopts && opt_len > 0) { 5518 udi_size += sizeof (struct T_opthdr) + opt_len; 5519 UDP_STAT(us, udp_in_recvopts); 5520 } 5521 5522 /* 5523 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5524 * space accordingly 5525 */ 5526 if (udp->udp_recvif && recv_on && 5527 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 5528 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5529 UDP_STAT(us, udp_in_recvif); 5530 } 5531 5532 if (udp->udp_recvslla && recv_on && 5533 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 5534 udi_size += sizeof (struct T_opthdr) + 5535 sizeof (struct sockaddr_dl); 5536 UDP_STAT(us, udp_in_recvslla); 5537 } 5538 5539 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5540 udi_size += sizeof (struct T_opthdr) + ucredsize; 5541 cpid = DB_CPID(mp); 5542 UDP_STAT(us, udp_in_recvucred); 5543 } 5544 /* 5545 * If IP_RECVTTL is set allocate the appropriate sized buffer 5546 */ 5547 if (udp->udp_recvttl) { 5548 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5549 UDP_STAT(us, udp_in_recvttl); 5550 } 5551 5552 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5553 mp1 = allocb(udi_size, BPRI_MED); 5554 if (mp1 == NULL) { 5555 freemsg(mp); 5556 if (options_mp != NULL) 5557 freeb(options_mp); 5558 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5559 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5560 BUMP_MIB(&udp->udp_mib, udpInErrors); 5561 return; 5562 } 5563 mp1->b_cont = mp; 5564 mp = mp1; 5565 mp->b_datap->db_type = M_PROTO; 5566 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5567 mp->b_wptr = (uchar_t *)tudi + udi_size; 5568 tudi->PRIM_type = T_UNITDATA_IND; 5569 tudi->SRC_length = sizeof (sin_t); 5570 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5571 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5572 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5573 tudi->OPT_length = udi_size; 5574 5575 sin = (sin_t *)&tudi[1]; 5576 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5577 sin->sin_port = ((in_port_t *) 5578 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5579 sin->sin_family = AF_INET; 5580 *(uint32_t *)&sin->sin_zero[0] = 0; 5581 *(uint32_t *)&sin->sin_zero[4] = 0; 5582 5583 /* 5584 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5585 * IP_RECVTTL has been set. 5586 */ 5587 if (udi_size != 0) { 5588 /* 5589 * Copy in destination address before options to avoid any 5590 * padding issues. 5591 */ 5592 char *dstopt; 5593 5594 dstopt = (char *)&sin[1]; 5595 if (udp->udp_recvdstaddr) { 5596 struct T_opthdr *toh; 5597 ipaddr_t *dstptr; 5598 5599 toh = (struct T_opthdr *)dstopt; 5600 toh->level = IPPROTO_IP; 5601 toh->name = IP_RECVDSTADDR; 5602 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5603 toh->status = 0; 5604 dstopt += sizeof (struct T_opthdr); 5605 dstptr = (ipaddr_t *)dstopt; 5606 *dstptr = (((ipaddr_t *)rptr)[4]); 5607 dstopt += sizeof (ipaddr_t); 5608 udi_size -= toh->len; 5609 } 5610 if (udp->udp_recvopts && udi_size != 0) { 5611 struct T_opthdr *toh; 5612 5613 toh = (struct T_opthdr *)dstopt; 5614 toh->level = IPPROTO_IP; 5615 toh->name = IP_RECVOPTS; 5616 toh->len = sizeof (struct T_opthdr) + opt_len; 5617 toh->status = 0; 5618 dstopt += sizeof (struct T_opthdr); 5619 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5620 dstopt += opt_len; 5621 udi_size -= toh->len; 5622 } 5623 if (udp->udp_ip_recvpktinfo && recv_on && 5624 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 5625 5626 struct T_opthdr *toh; 5627 struct in_pktinfo *pktinfop; 5628 5629 toh = (struct T_opthdr *)dstopt; 5630 toh->level = IPPROTO_IP; 5631 toh->name = IP_PKTINFO; 5632 toh->len = sizeof (struct T_opthdr) + 5633 sizeof (*pktinfop); 5634 toh->status = 0; 5635 dstopt += sizeof (struct T_opthdr); 5636 pktinfop = (struct in_pktinfo *)dstopt; 5637 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 5638 pktinfop->ipi_spec_dst = pinfo->ip_pkt_match_addr; 5639 5640 pktinfop->ipi_addr.s_addr = ((ipha_t *)rptr)->ipha_dst; 5641 5642 dstopt += sizeof (struct in_pktinfo); 5643 udi_size -= toh->len; 5644 } 5645 5646 if (udp->udp_recvslla && recv_on && 5647 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 5648 5649 struct T_opthdr *toh; 5650 struct sockaddr_dl *dstptr; 5651 5652 toh = (struct T_opthdr *)dstopt; 5653 toh->level = IPPROTO_IP; 5654 toh->name = IP_RECVSLLA; 5655 toh->len = sizeof (struct T_opthdr) + 5656 sizeof (struct sockaddr_dl); 5657 toh->status = 0; 5658 dstopt += sizeof (struct T_opthdr); 5659 dstptr = (struct sockaddr_dl *)dstopt; 5660 bcopy(&pinfo->ip_pkt_slla, dstptr, 5661 sizeof (struct sockaddr_dl)); 5662 dstopt += sizeof (struct sockaddr_dl); 5663 udi_size -= toh->len; 5664 } 5665 5666 if (udp->udp_recvif && recv_on && 5667 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 5668 5669 struct T_opthdr *toh; 5670 uint_t *dstptr; 5671 5672 toh = (struct T_opthdr *)dstopt; 5673 toh->level = IPPROTO_IP; 5674 toh->name = IP_RECVIF; 5675 toh->len = sizeof (struct T_opthdr) + 5676 sizeof (uint_t); 5677 toh->status = 0; 5678 dstopt += sizeof (struct T_opthdr); 5679 dstptr = (uint_t *)dstopt; 5680 *dstptr = pinfo->ip_pkt_ifindex; 5681 dstopt += sizeof (uint_t); 5682 udi_size -= toh->len; 5683 } 5684 5685 if (cr != NULL) { 5686 struct T_opthdr *toh; 5687 5688 toh = (struct T_opthdr *)dstopt; 5689 toh->level = SOL_SOCKET; 5690 toh->name = SCM_UCRED; 5691 toh->len = sizeof (struct T_opthdr) + ucredsize; 5692 toh->status = 0; 5693 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5694 dstopt += toh->len; 5695 udi_size -= toh->len; 5696 } 5697 5698 if (udp->udp_recvttl) { 5699 struct T_opthdr *toh; 5700 uint8_t *dstptr; 5701 5702 toh = (struct T_opthdr *)dstopt; 5703 toh->level = IPPROTO_IP; 5704 toh->name = IP_RECVTTL; 5705 toh->len = sizeof (struct T_opthdr) + 5706 sizeof (uint8_t); 5707 toh->status = 0; 5708 dstopt += sizeof (struct T_opthdr); 5709 dstptr = (uint8_t *)dstopt; 5710 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5711 dstopt += sizeof (uint8_t); 5712 udi_size -= toh->len; 5713 } 5714 5715 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5716 } 5717 BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); 5718 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5719 "udp_rput_other_end: q %p (%S)", q, "end"); 5720 if (options_mp != NULL) 5721 freeb(options_mp); 5722 5723 if (udp->udp_direct_sockfs) { 5724 /* 5725 * There is nothing above us except for the stream head; 5726 * use the read-side synchronous stream interface in 5727 * order to reduce the time spent in interrupt thread. 5728 */ 5729 ASSERT(udp->udp_issocket); 5730 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5731 } else { 5732 /* 5733 * Use regular STREAMS interface to pass data upstream 5734 * if this is not a socket endpoint, or if we have 5735 * switched over to the slow mode due to sockmod being 5736 * popped or a module being pushed on top of us. 5737 */ 5738 putnext(UDP_RD(q), mp); 5739 } 5740 } 5741 5742 /* ARGSUSED */ 5743 static void 5744 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5745 { 5746 conn_t *connp = arg; 5747 5748 udp_rput_other(connp->conn_rq, mp); 5749 udp_exit(connp); 5750 } 5751 5752 /* 5753 * Process a T_BIND_ACK 5754 */ 5755 static void 5756 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5757 { 5758 udp_t *udp = Q_TO_UDP(q); 5759 mblk_t *mp1; 5760 ire_t *ire; 5761 struct T_bind_ack *tba; 5762 uchar_t *addrp; 5763 ipa_conn_t *ac; 5764 ipa6_conn_t *ac6; 5765 5766 if (udp->udp_discon_pending) 5767 udp->udp_discon_pending = 0; 5768 5769 /* 5770 * If a broadcast/multicast address was bound set 5771 * the source address to 0. 5772 * This ensures no datagrams with broadcast address 5773 * as source address are emitted (which would violate 5774 * RFC1122 - Hosts requirements) 5775 * 5776 * Note that when connecting the returned IRE is 5777 * for the destination address and we only perform 5778 * the broadcast check for the source address (it 5779 * is OK to connect to a broadcast/multicast address.) 5780 */ 5781 mp1 = mp->b_cont; 5782 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5783 ire = (ire_t *)mp1->b_rptr; 5784 5785 /* 5786 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5787 * local address. 5788 */ 5789 if (ire->ire_type == IRE_BROADCAST && 5790 udp->udp_state != TS_DATA_XFER) { 5791 /* This was just a local bind to a broadcast addr */ 5792 V6_SET_ZERO(udp->udp_v6src); 5793 if (udp->udp_family == AF_INET6) 5794 (void) udp_build_hdrs(q, udp); 5795 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5796 /* 5797 * Local address not yet set - pick it from the 5798 * T_bind_ack 5799 */ 5800 tba = (struct T_bind_ack *)mp->b_rptr; 5801 addrp = &mp->b_rptr[tba->ADDR_offset]; 5802 switch (udp->udp_family) { 5803 case AF_INET: 5804 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5805 ac = (ipa_conn_t *)addrp; 5806 } else { 5807 ASSERT(tba->ADDR_length == 5808 sizeof (ipa_conn_x_t)); 5809 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5810 } 5811 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5812 &udp->udp_v6src); 5813 break; 5814 case AF_INET6: 5815 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5816 ac6 = (ipa6_conn_t *)addrp; 5817 } else { 5818 ASSERT(tba->ADDR_length == 5819 sizeof (ipa6_conn_x_t)); 5820 ac6 = &((ipa6_conn_x_t *) 5821 addrp)->ac6x_conn; 5822 } 5823 udp->udp_v6src = ac6->ac6_laddr; 5824 (void) udp_build_hdrs(q, udp); 5825 break; 5826 } 5827 } 5828 mp1 = mp1->b_cont; 5829 } 5830 /* 5831 * Look for one or more appended ACK message added by 5832 * udp_connect or udp_disconnect. 5833 * If none found just send up the T_BIND_ACK. 5834 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5835 * udp_disconnect has appended a T_OK_ACK. 5836 */ 5837 if (mp1 != NULL) { 5838 if (mp->b_cont == mp1) 5839 mp->b_cont = NULL; 5840 else { 5841 ASSERT(mp->b_cont->b_cont == mp1); 5842 mp->b_cont->b_cont = NULL; 5843 } 5844 freemsg(mp); 5845 mp = mp1; 5846 while (mp != NULL) { 5847 mp1 = mp->b_cont; 5848 mp->b_cont = NULL; 5849 putnext(UDP_RD(q), mp); 5850 mp = mp1; 5851 } 5852 return; 5853 } 5854 freemsg(mp->b_cont); 5855 mp->b_cont = NULL; 5856 putnext(UDP_RD(q), mp); 5857 } 5858 5859 /* 5860 * return SNMP stuff in buffer in mpdata 5861 */ 5862 int 5863 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5864 { 5865 mblk_t *mpdata; 5866 mblk_t *mp_conn_ctl; 5867 mblk_t *mp_attr_ctl; 5868 mblk_t *mp6_conn_ctl; 5869 mblk_t *mp6_attr_ctl; 5870 mblk_t *mp_conn_tail; 5871 mblk_t *mp_attr_tail; 5872 mblk_t *mp6_conn_tail; 5873 mblk_t *mp6_attr_tail; 5874 struct opthdr *optp; 5875 mib2_udpEntry_t ude; 5876 mib2_udp6Entry_t ude6; 5877 mib2_transportMLPEntry_t mlp; 5878 int state; 5879 zoneid_t zoneid; 5880 int i; 5881 connf_t *connfp; 5882 conn_t *connp = Q_TO_CONN(q); 5883 udp_t *udp = connp->conn_udp; 5884 int v4_conn_idx; 5885 int v6_conn_idx; 5886 boolean_t needattr; 5887 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5888 5889 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5890 if (mpctl == NULL || 5891 (mpdata = mpctl->b_cont) == NULL || 5892 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5893 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5894 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5895 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5896 freemsg(mp_conn_ctl); 5897 freemsg(mp_attr_ctl); 5898 freemsg(mp6_conn_ctl); 5899 return (0); 5900 } 5901 5902 zoneid = connp->conn_zoneid; 5903 5904 /* fixed length structure for IPv4 and IPv6 counters */ 5905 SET_MIB(udp->udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5906 SET_MIB(udp->udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5907 /* synchronize 64- and 32-bit counters */ 5908 SYNC32_MIB(&udp->udp_mib, udpInDatagrams, udpHCInDatagrams); 5909 SYNC32_MIB(&udp->udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5910 5911 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5912 optp->level = MIB2_UDP; 5913 optp->name = 0; 5914 (void) snmp_append_data(mpdata, (char *)&udp->udp_mib, 5915 sizeof (udp->udp_mib)); 5916 optp->len = msgdsize(mpdata); 5917 qreply(q, mpctl); 5918 5919 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5920 v4_conn_idx = v6_conn_idx = 0; 5921 5922 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5923 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5924 connp = NULL; 5925 5926 while ((connp = ipcl_get_next_conn(connfp, connp, 5927 IPCL_UDP))) { 5928 udp = connp->conn_udp; 5929 if (zoneid != connp->conn_zoneid) 5930 continue; 5931 5932 /* 5933 * Note that the port numbers are sent in 5934 * host byte order 5935 */ 5936 5937 if (udp->udp_state == TS_UNBND) 5938 state = MIB2_UDP_unbound; 5939 else if (udp->udp_state == TS_IDLE) 5940 state = MIB2_UDP_idle; 5941 else if (udp->udp_state == TS_DATA_XFER) 5942 state = MIB2_UDP_connected; 5943 else 5944 state = MIB2_UDP_unknown; 5945 5946 needattr = B_FALSE; 5947 bzero(&mlp, sizeof (mlp)); 5948 if (connp->conn_mlp_type != mlptSingle) { 5949 if (connp->conn_mlp_type == mlptShared || 5950 connp->conn_mlp_type == mlptBoth) 5951 mlp.tme_flags |= MIB2_TMEF_SHARED; 5952 if (connp->conn_mlp_type == mlptPrivate || 5953 connp->conn_mlp_type == mlptBoth) 5954 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5955 needattr = B_TRUE; 5956 } 5957 5958 /* 5959 * Create an IPv4 table entry for IPv4 entries and also 5960 * any IPv6 entries which are bound to in6addr_any 5961 * (i.e. anything a IPv4 peer could connect/send to). 5962 */ 5963 if (udp->udp_ipversion == IPV4_VERSION || 5964 (udp->udp_state <= TS_IDLE && 5965 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5966 ude.udpEntryInfo.ue_state = state; 5967 /* 5968 * If in6addr_any this will set it to 5969 * INADDR_ANY 5970 */ 5971 ude.udpLocalAddress = 5972 V4_PART_OF_V6(udp->udp_v6src); 5973 ude.udpLocalPort = ntohs(udp->udp_port); 5974 if (udp->udp_state == TS_DATA_XFER) { 5975 /* 5976 * Can potentially get here for 5977 * v6 socket if another process 5978 * (say, ping) has just done a 5979 * sendto(), changing the state 5980 * from the TS_IDLE above to 5981 * TS_DATA_XFER by the time we hit 5982 * this part of the code. 5983 */ 5984 ude.udpEntryInfo.ue_RemoteAddress = 5985 V4_PART_OF_V6(udp->udp_v6dst); 5986 ude.udpEntryInfo.ue_RemotePort = 5987 ntohs(udp->udp_dstport); 5988 } else { 5989 ude.udpEntryInfo.ue_RemoteAddress = 0; 5990 ude.udpEntryInfo.ue_RemotePort = 0; 5991 } 5992 5993 /* 5994 * We make the assumption that all udp_t 5995 * structs will be created within an address 5996 * region no larger than 32-bits. 5997 */ 5998 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5999 ude.udpCreationProcess = 6000 (udp->udp_open_pid < 0) ? 6001 MIB2_UNKNOWN_PROCESS : 6002 udp->udp_open_pid; 6003 ude.udpCreationTime = udp->udp_open_time; 6004 6005 (void) snmp_append_data2(mp_conn_ctl->b_cont, 6006 &mp_conn_tail, (char *)&ude, sizeof (ude)); 6007 mlp.tme_connidx = v4_conn_idx++; 6008 if (needattr) 6009 (void) snmp_append_data2( 6010 mp_attr_ctl->b_cont, &mp_attr_tail, 6011 (char *)&mlp, sizeof (mlp)); 6012 } 6013 if (udp->udp_ipversion == IPV6_VERSION) { 6014 ude6.udp6EntryInfo.ue_state = state; 6015 ude6.udp6LocalAddress = udp->udp_v6src; 6016 ude6.udp6LocalPort = ntohs(udp->udp_port); 6017 ude6.udp6IfIndex = udp->udp_bound_if; 6018 if (udp->udp_state == TS_DATA_XFER) { 6019 ude6.udp6EntryInfo.ue_RemoteAddress = 6020 udp->udp_v6dst; 6021 ude6.udp6EntryInfo.ue_RemotePort = 6022 ntohs(udp->udp_dstport); 6023 } else { 6024 ude6.udp6EntryInfo.ue_RemoteAddress = 6025 sin6_null.sin6_addr; 6026 ude6.udp6EntryInfo.ue_RemotePort = 0; 6027 } 6028 /* 6029 * We make the assumption that all udp_t 6030 * structs will be created within an address 6031 * region no larger than 32-bits. 6032 */ 6033 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 6034 ude6.udp6CreationProcess = 6035 (udp->udp_open_pid < 0) ? 6036 MIB2_UNKNOWN_PROCESS : 6037 udp->udp_open_pid; 6038 ude6.udp6CreationTime = udp->udp_open_time; 6039 6040 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 6041 &mp6_conn_tail, (char *)&ude6, 6042 sizeof (ude6)); 6043 mlp.tme_connidx = v6_conn_idx++; 6044 if (needattr) 6045 (void) snmp_append_data2( 6046 mp6_attr_ctl->b_cont, 6047 &mp6_attr_tail, (char *)&mlp, 6048 sizeof (mlp)); 6049 } 6050 } 6051 } 6052 6053 /* IPv4 UDP endpoints */ 6054 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 6055 sizeof (struct T_optmgmt_ack)]; 6056 optp->level = MIB2_UDP; 6057 optp->name = MIB2_UDP_ENTRY; 6058 optp->len = msgdsize(mp_conn_ctl->b_cont); 6059 qreply(q, mp_conn_ctl); 6060 6061 /* table of MLP attributes... */ 6062 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 6063 sizeof (struct T_optmgmt_ack)]; 6064 optp->level = MIB2_UDP; 6065 optp->name = EXPER_XPORT_MLP; 6066 optp->len = msgdsize(mp_attr_ctl->b_cont); 6067 if (optp->len == 0) 6068 freemsg(mp_attr_ctl); 6069 else 6070 qreply(q, mp_attr_ctl); 6071 6072 /* IPv6 UDP endpoints */ 6073 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 6074 sizeof (struct T_optmgmt_ack)]; 6075 optp->level = MIB2_UDP6; 6076 optp->name = MIB2_UDP6_ENTRY; 6077 optp->len = msgdsize(mp6_conn_ctl->b_cont); 6078 qreply(q, mp6_conn_ctl); 6079 6080 /* table of MLP attributes... */ 6081 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 6082 sizeof (struct T_optmgmt_ack)]; 6083 optp->level = MIB2_UDP6; 6084 optp->name = EXPER_XPORT_MLP; 6085 optp->len = msgdsize(mp6_attr_ctl->b_cont); 6086 if (optp->len == 0) 6087 freemsg(mp6_attr_ctl); 6088 else 6089 qreply(q, mp6_attr_ctl); 6090 6091 return (1); 6092 } 6093 6094 /* 6095 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 6096 * NOTE: Per MIB-II, UDP has no writable data. 6097 * TODO: If this ever actually tries to set anything, it needs to be 6098 * to do the appropriate locking. 6099 */ 6100 /* ARGSUSED */ 6101 int 6102 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 6103 uchar_t *ptr, int len) 6104 { 6105 switch (level) { 6106 case MIB2_UDP: 6107 return (0); 6108 default: 6109 return (1); 6110 } 6111 } 6112 6113 static void 6114 udp_report_item(mblk_t *mp, udp_t *udp) 6115 { 6116 char *state; 6117 char addrbuf1[INET6_ADDRSTRLEN]; 6118 char addrbuf2[INET6_ADDRSTRLEN]; 6119 uint_t print_len, buf_len; 6120 6121 buf_len = mp->b_datap->db_lim - mp->b_wptr; 6122 ASSERT(buf_len >= 0); 6123 if (buf_len == 0) 6124 return; 6125 6126 if (udp->udp_state == TS_UNBND) 6127 state = "UNBOUND"; 6128 else if (udp->udp_state == TS_IDLE) 6129 state = "IDLE"; 6130 else if (udp->udp_state == TS_DATA_XFER) 6131 state = "CONNECTED"; 6132 else 6133 state = "UnkState"; 6134 print_len = snprintf((char *)mp->b_wptr, buf_len, 6135 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 6136 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 6137 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 6138 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 6139 ntohs(udp->udp_dstport), state); 6140 if (print_len < buf_len) { 6141 mp->b_wptr += print_len; 6142 } else { 6143 mp->b_wptr += buf_len; 6144 } 6145 } 6146 6147 /* Report for ndd "udp_status" */ 6148 /* ARGSUSED */ 6149 static int 6150 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 6151 { 6152 zoneid_t zoneid; 6153 connf_t *connfp; 6154 conn_t *connp = Q_TO_CONN(q); 6155 udp_t *udp = connp->conn_udp; 6156 int i; 6157 udp_stack_t *us = udp->udp_us; 6158 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6159 6160 /* 6161 * Because of the ndd constraint, at most we can have 64K buffer 6162 * to put in all UDP info. So to be more efficient, just 6163 * allocate a 64K buffer here, assuming we need that large buffer. 6164 * This may be a problem as any user can read udp_status. Therefore 6165 * we limit the rate of doing this using us_ndd_get_info_interval. 6166 * This should be OK as normal users should not do this too often. 6167 */ 6168 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 6169 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 6170 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 6171 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 6172 return (0); 6173 } 6174 } 6175 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 6176 /* The following may work even if we cannot get a large buf. */ 6177 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 6178 return (0); 6179 } 6180 (void) mi_mpprintf(mp, 6181 "UDP " MI_COL_HDRPAD_STR 6182 /* 12345678[89ABCDEF] */ 6183 " zone lport src addr dest addr port state"); 6184 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 6185 6186 zoneid = connp->conn_zoneid; 6187 6188 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 6189 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 6190 connp = NULL; 6191 6192 while ((connp = ipcl_get_next_conn(connfp, connp, 6193 IPCL_UDP))) { 6194 udp = connp->conn_udp; 6195 if (zoneid != GLOBAL_ZONEID && 6196 zoneid != connp->conn_zoneid) 6197 continue; 6198 6199 udp_report_item(mp->b_cont, udp); 6200 } 6201 } 6202 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 6203 return (0); 6204 } 6205 6206 /* 6207 * This routine creates a T_UDERROR_IND message and passes it upstream. 6208 * The address and options are copied from the T_UNITDATA_REQ message 6209 * passed in mp. This message is freed. 6210 */ 6211 static void 6212 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 6213 t_scalar_t err) 6214 { 6215 struct T_unitdata_req *tudr; 6216 mblk_t *mp1; 6217 uchar_t *optaddr; 6218 t_scalar_t optlen; 6219 6220 if (DB_TYPE(mp) == M_DATA) { 6221 ASSERT(destaddr != NULL && destlen != 0); 6222 optaddr = NULL; 6223 optlen = 0; 6224 } else { 6225 if ((mp->b_wptr < mp->b_rptr) || 6226 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 6227 goto done; 6228 } 6229 tudr = (struct T_unitdata_req *)mp->b_rptr; 6230 destaddr = mp->b_rptr + tudr->DEST_offset; 6231 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 6232 destaddr + tudr->DEST_length < mp->b_rptr || 6233 destaddr + tudr->DEST_length > mp->b_wptr) { 6234 goto done; 6235 } 6236 optaddr = mp->b_rptr + tudr->OPT_offset; 6237 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 6238 optaddr + tudr->OPT_length < mp->b_rptr || 6239 optaddr + tudr->OPT_length > mp->b_wptr) { 6240 goto done; 6241 } 6242 destlen = tudr->DEST_length; 6243 optlen = tudr->OPT_length; 6244 } 6245 6246 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 6247 (char *)optaddr, optlen, err); 6248 if (mp1 != NULL) 6249 putnext(UDP_RD(q), mp1); 6250 6251 done: 6252 freemsg(mp); 6253 } 6254 6255 /* 6256 * This routine removes a port number association from a stream. It 6257 * is called by udp_wput to handle T_UNBIND_REQ messages. 6258 */ 6259 static void 6260 udp_unbind(queue_t *q, mblk_t *mp) 6261 { 6262 udp_t *udp = Q_TO_UDP(q); 6263 6264 /* If a bind has not been done, we can't unbind. */ 6265 if (udp->udp_state == TS_UNBND) { 6266 udp_err_ack(q, mp, TOUTSTATE, 0); 6267 return; 6268 } 6269 if (cl_inet_unbind != NULL) { 6270 /* 6271 * Running in cluster mode - register unbind information 6272 */ 6273 if (udp->udp_ipversion == IPV4_VERSION) { 6274 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 6275 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 6276 (in_port_t)udp->udp_port); 6277 } else { 6278 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 6279 (uint8_t *)&(udp->udp_v6src), 6280 (in_port_t)udp->udp_port); 6281 } 6282 } 6283 6284 udp_bind_hash_remove(udp, B_FALSE); 6285 V6_SET_ZERO(udp->udp_v6src); 6286 V6_SET_ZERO(udp->udp_bound_v6src); 6287 udp->udp_port = 0; 6288 udp->udp_state = TS_UNBND; 6289 6290 if (udp->udp_family == AF_INET6) { 6291 int error; 6292 6293 /* Rebuild the header template */ 6294 error = udp_build_hdrs(q, udp); 6295 if (error != 0) { 6296 udp_err_ack(q, mp, TSYSERR, error); 6297 return; 6298 } 6299 } 6300 /* 6301 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 6302 * and therefore ip_unbind must never return NULL. 6303 */ 6304 mp = ip_unbind(q, mp); 6305 ASSERT(mp != NULL); 6306 putnext(UDP_RD(q), mp); 6307 } 6308 6309 /* 6310 * Don't let port fall into the privileged range. 6311 * Since the extra privileged ports can be arbitrary we also 6312 * ensure that we exclude those from consideration. 6313 * us->us_epriv_ports is not sorted thus we loop over it until 6314 * there are no changes. 6315 */ 6316 static in_port_t 6317 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 6318 { 6319 int i; 6320 in_port_t nextport; 6321 boolean_t restart = B_FALSE; 6322 udp_stack_t *us = udp->udp_us; 6323 6324 if (random && udp_random_anon_port != 0) { 6325 (void) random_get_pseudo_bytes((uint8_t *)&port, 6326 sizeof (in_port_t)); 6327 /* 6328 * Unless changed by a sys admin, the smallest anon port 6329 * is 32768 and the largest anon port is 65535. It is 6330 * very likely (50%) for the random port to be smaller 6331 * than the smallest anon port. When that happens, 6332 * add port % (anon port range) to the smallest anon 6333 * port to get the random port. It should fall into the 6334 * valid anon port range. 6335 */ 6336 if (port < us->us_smallest_anon_port) { 6337 port = us->us_smallest_anon_port + 6338 port % (us->us_largest_anon_port - 6339 us->us_smallest_anon_port); 6340 } 6341 } 6342 6343 retry: 6344 if (port < us->us_smallest_anon_port) 6345 port = us->us_smallest_anon_port; 6346 6347 if (port > us->us_largest_anon_port) { 6348 port = us->us_smallest_anon_port; 6349 if (restart) 6350 return (0); 6351 restart = B_TRUE; 6352 } 6353 6354 if (port < us->us_smallest_nonpriv_port) 6355 port = us->us_smallest_nonpriv_port; 6356 6357 for (i = 0; i < us->us_num_epriv_ports; i++) { 6358 if (port == us->us_epriv_ports[i]) { 6359 port++; 6360 /* 6361 * Make sure that the port is in the 6362 * valid range. 6363 */ 6364 goto retry; 6365 } 6366 } 6367 6368 if (is_system_labeled() && 6369 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 6370 port, IPPROTO_UDP, B_TRUE)) != 0) { 6371 port = nextport; 6372 goto retry; 6373 } 6374 6375 return (port); 6376 } 6377 6378 static int 6379 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 6380 { 6381 int err; 6382 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 6383 udp_t *udp = Q_TO_UDP(wq); 6384 6385 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 6386 opt_storage, udp->udp_mac_exempt, 6387 udp->udp_us->us_netstack->netstack_ip); 6388 if (err == 0) { 6389 err = tsol_update_options(&udp->udp_ip_snd_options, 6390 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 6391 opt_storage); 6392 } 6393 if (err != 0) { 6394 DTRACE_PROBE4( 6395 tx__ip__log__info__updatelabel__udp, 6396 char *, "queue(1) failed to update options(2) on mp(3)", 6397 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6398 } else { 6399 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 6400 } 6401 return (err); 6402 } 6403 6404 static mblk_t * 6405 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 6406 uint_t srcid, int *error, boolean_t insert_spi) 6407 { 6408 udp_t *udp = connp->conn_udp; 6409 queue_t *q = connp->conn_wq; 6410 mblk_t *mp1 = mp; 6411 mblk_t *mp2; 6412 ipha_t *ipha; 6413 int ip_hdr_length; 6414 uint32_t ip_len; 6415 udpha_t *udpha; 6416 udpattrs_t attrs; 6417 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 6418 uint32_t ip_snd_opt_len = 0; 6419 ip4_pkt_t pktinfo; 6420 ip4_pkt_t *pktinfop = &pktinfo; 6421 ip_opt_info_t optinfo; 6422 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6423 udp_stack_t *us = udp->udp_us; 6424 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 6425 6426 6427 *error = 0; 6428 pktinfop->ip4_ill_index = 0; 6429 pktinfop->ip4_addr = INADDR_ANY; 6430 optinfo.ip_opt_flags = 0; 6431 optinfo.ip_opt_ill_index = 0; 6432 6433 if (v4dst == INADDR_ANY) 6434 v4dst = htonl(INADDR_LOOPBACK); 6435 6436 /* 6437 * If options passed in, feed it for verification and handling 6438 */ 6439 attrs.udpattr_credset = B_FALSE; 6440 if (DB_TYPE(mp) != M_DATA) { 6441 mp1 = mp->b_cont; 6442 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6443 attrs.udpattr_ipp4 = pktinfop; 6444 attrs.udpattr_mb = mp; 6445 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 6446 goto done; 6447 /* 6448 * Note: success in processing options. 6449 * mp option buffer represented by 6450 * OPT_length/offset now potentially modified 6451 * and contain option setting results 6452 */ 6453 ASSERT(*error == 0); 6454 } 6455 } 6456 6457 /* mp1 points to the M_DATA mblk carrying the packet */ 6458 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6459 6460 /* 6461 * Check if our saved options are valid; update if not. 6462 * TSOL Note: Since we are not in WRITER mode, UDP packets 6463 * to different destination may require different labels, 6464 * or worse, UDP packets to same IP address may require 6465 * different labels due to use of shared all-zones address. 6466 * We use conn_lock to ensure that lastdst, ip_snd_options, 6467 * and ip_snd_options_len are consistent for the current 6468 * destination and are updated atomically. 6469 */ 6470 mutex_enter(&connp->conn_lock); 6471 if (is_system_labeled()) { 6472 /* Using UDP MLP requires SCM_UCRED from user */ 6473 if (connp->conn_mlp_type != mlptSingle && 6474 !attrs.udpattr_credset) { 6475 mutex_exit(&connp->conn_lock); 6476 DTRACE_PROBE4( 6477 tx__ip__log__info__output__udp, 6478 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6479 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6480 *error = ECONNREFUSED; 6481 goto done; 6482 } 6483 /* 6484 * update label option for this UDP socket if 6485 * - the destination has changed, or 6486 * - the UDP socket is MLP 6487 */ 6488 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 6489 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 6490 connp->conn_mlp_type != mlptSingle) && 6491 (*error = udp_update_label(q, mp, v4dst)) != 0) { 6492 mutex_exit(&connp->conn_lock); 6493 goto done; 6494 } 6495 } 6496 if (udp->udp_ip_snd_options_len > 0) { 6497 ip_snd_opt_len = udp->udp_ip_snd_options_len; 6498 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 6499 } 6500 mutex_exit(&connp->conn_lock); 6501 6502 /* Add an IP header */ 6503 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 6504 (insert_spi ? sizeof (uint32_t) : 0); 6505 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 6506 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 6507 !OK_32PTR(ipha)) { 6508 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 6509 if (mp2 == NULL) { 6510 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6511 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 6512 *error = ENOMEM; 6513 goto done; 6514 } 6515 mp2->b_wptr = DB_LIM(mp2); 6516 mp2->b_cont = mp1; 6517 mp1 = mp2; 6518 if (DB_TYPE(mp) != M_DATA) 6519 mp->b_cont = mp1; 6520 else 6521 mp = mp1; 6522 6523 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 6524 } 6525 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 6526 #ifdef _BIG_ENDIAN 6527 /* Set version, header length, and tos */ 6528 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6529 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 6530 udp->udp_type_of_service); 6531 /* Set ttl and protocol */ 6532 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 6533 #else 6534 /* Set version, header length, and tos */ 6535 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6536 ((udp->udp_type_of_service << 8) | 6537 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 6538 /* Set ttl and protocol */ 6539 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 6540 #endif 6541 if (pktinfop->ip4_addr != INADDR_ANY) { 6542 ipha->ipha_src = pktinfop->ip4_addr; 6543 optinfo.ip_opt_flags = IP_VERIFY_SRC; 6544 } else { 6545 /* 6546 * Copy our address into the packet. If this is zero, 6547 * first look at __sin6_src_id for a hint. If we leave the 6548 * source as INADDR_ANY then ip will fill in the real source 6549 * address. 6550 */ 6551 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 6552 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 6553 in6_addr_t v6src; 6554 6555 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 6556 us->us_netstack); 6557 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 6558 } 6559 } 6560 6561 if (pktinfop->ip4_ill_index != 0) { 6562 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 6563 } 6564 6565 ipha->ipha_fragment_offset_and_flags = 0; 6566 ipha->ipha_ident = 0; 6567 6568 mp1->b_rptr = (uchar_t *)ipha; 6569 6570 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 6571 (uintptr_t)UINT_MAX); 6572 6573 /* Determine length of packet */ 6574 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 6575 if ((mp2 = mp1->b_cont) != NULL) { 6576 do { 6577 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6578 ip_len += (uint32_t)MBLKL(mp2); 6579 } while ((mp2 = mp2->b_cont) != NULL); 6580 } 6581 /* 6582 * If the size of the packet is greater than the maximum allowed by 6583 * ip, return an error. Passing this down could cause panics because 6584 * the size will have wrapped and be inconsistent with the msg size. 6585 */ 6586 if (ip_len > IP_MAXPACKET) { 6587 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6588 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 6589 *error = EMSGSIZE; 6590 goto done; 6591 } 6592 ipha->ipha_length = htons((uint16_t)ip_len); 6593 ip_len -= ip_hdr_length; 6594 ip_len = htons((uint16_t)ip_len); 6595 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 6596 6597 /* Insert all-0s SPI now. */ 6598 if (insert_spi) 6599 *((uint32_t *)(udpha + 1)) = 0; 6600 6601 /* 6602 * Copy in the destination address 6603 */ 6604 ipha->ipha_dst = v4dst; 6605 6606 /* 6607 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 6608 */ 6609 if (CLASSD(v4dst)) 6610 ipha->ipha_ttl = udp->udp_multicast_ttl; 6611 6612 udpha->uha_dst_port = port; 6613 udpha->uha_src_port = udp->udp_port; 6614 6615 if (ip_snd_opt_len > 0) { 6616 uint32_t cksum; 6617 6618 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 6619 /* 6620 * Massage source route putting first source route in ipha_dst. 6621 * Ignore the destination in T_unitdata_req. 6622 * Create a checksum adjustment for a source route, if any. 6623 */ 6624 cksum = ip_massage_options(ipha, us->us_netstack); 6625 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6626 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 6627 (ipha->ipha_dst & 0xFFFF); 6628 if ((int)cksum < 0) 6629 cksum--; 6630 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6631 /* 6632 * IP does the checksum if uha_checksum is non-zero, 6633 * We make it easy for IP to include our pseudo header 6634 * by putting our length in uha_checksum. 6635 */ 6636 cksum += ip_len; 6637 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6638 /* There might be a carry. */ 6639 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6640 #ifdef _LITTLE_ENDIAN 6641 if (us->us_do_checksum) 6642 ip_len = (cksum << 16) | ip_len; 6643 #else 6644 if (us->us_do_checksum) 6645 ip_len = (ip_len << 16) | cksum; 6646 else 6647 ip_len <<= 16; 6648 #endif 6649 } else { 6650 /* 6651 * IP does the checksum if uha_checksum is non-zero, 6652 * We make it easy for IP to include our pseudo header 6653 * by putting our length in uha_checksum. 6654 */ 6655 if (us->us_do_checksum) 6656 ip_len |= (ip_len << 16); 6657 #ifndef _LITTLE_ENDIAN 6658 else 6659 ip_len <<= 16; 6660 #endif 6661 } 6662 6663 /* Set UDP length and checksum */ 6664 *((uint32_t *)&udpha->uha_length) = ip_len; 6665 if (DB_CRED(mp) != NULL) 6666 mblk_setcred(mp1, DB_CRED(mp)); 6667 6668 if (DB_TYPE(mp) != M_DATA) { 6669 ASSERT(mp != mp1); 6670 freeb(mp); 6671 } 6672 6673 /* mp has been consumed and we'll return success */ 6674 ASSERT(*error == 0); 6675 mp = NULL; 6676 6677 /* We're done. Pass the packet to ip. */ 6678 BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); 6679 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6680 "udp_wput_end: q %p (%S)", q, "end"); 6681 6682 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6683 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 6684 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 6685 connp->conn_nofailover_ill != NULL || 6686 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 6687 optinfo.ip_opt_ill_index != 0 || 6688 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6689 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 6690 ipst->ips_ip_g_mrouter != NULL) { 6691 UDP_STAT(us, udp_ip_send); 6692 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 6693 &optinfo); 6694 } else { 6695 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6696 } 6697 6698 done: 6699 if (*error != 0) { 6700 ASSERT(mp != NULL); 6701 BUMP_MIB(&udp->udp_mib, udpOutErrors); 6702 } 6703 return (mp); 6704 } 6705 6706 static void 6707 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6708 { 6709 conn_t *connp = udp->udp_connp; 6710 ipaddr_t src, dst; 6711 ill_t *ill; 6712 ire_t *ire; 6713 ipif_t *ipif = NULL; 6714 mblk_t *ire_fp_mp; 6715 uint_t ire_fp_mp_len; 6716 uint16_t *up; 6717 uint32_t cksum, hcksum_txflags; 6718 queue_t *dev_q; 6719 boolean_t retry_caching; 6720 udp_stack_t *us = udp->udp_us; 6721 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6722 6723 dst = ipha->ipha_dst; 6724 src = ipha->ipha_src; 6725 ASSERT(ipha->ipha_ident == 0); 6726 6727 if (CLASSD(dst)) { 6728 int err; 6729 6730 ipif = conn_get_held_ipif(connp, 6731 &connp->conn_multicast_ipif, &err); 6732 6733 if (ipif == NULL || ipif->ipif_isv6 || 6734 (ipif->ipif_ill->ill_phyint->phyint_flags & 6735 PHYI_LOOPBACK)) { 6736 if (ipif != NULL) 6737 ipif_refrele(ipif); 6738 UDP_STAT(us, udp_ip_send); 6739 ip_output(connp, mp, q, IP_WPUT); 6740 return; 6741 } 6742 } 6743 6744 retry_caching = B_FALSE; 6745 mutex_enter(&connp->conn_lock); 6746 ire = connp->conn_ire_cache; 6747 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6748 6749 if (ire == NULL || ire->ire_addr != dst || 6750 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6751 retry_caching = B_TRUE; 6752 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6753 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6754 6755 ASSERT(ipif != NULL); 6756 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6757 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6758 retry_caching = B_TRUE; 6759 } 6760 6761 if (!retry_caching) { 6762 ASSERT(ire != NULL); 6763 IRE_REFHOLD(ire); 6764 mutex_exit(&connp->conn_lock); 6765 } else { 6766 boolean_t cached = B_FALSE; 6767 6768 connp->conn_ire_cache = NULL; 6769 mutex_exit(&connp->conn_lock); 6770 6771 /* Release the old ire */ 6772 if (ire != NULL) { 6773 IRE_REFRELE_NOTR(ire); 6774 ire = NULL; 6775 } 6776 6777 if (CLASSD(dst)) { 6778 ASSERT(ipif != NULL); 6779 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6780 connp->conn_zoneid, MBLK_GETLABEL(mp), 6781 MATCH_IRE_ILL_GROUP, ipst); 6782 } else { 6783 ASSERT(ipif == NULL); 6784 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6785 MBLK_GETLABEL(mp), ipst); 6786 } 6787 6788 if (ire == NULL) { 6789 if (ipif != NULL) 6790 ipif_refrele(ipif); 6791 UDP_STAT(us, udp_ire_null); 6792 ip_output(connp, mp, q, IP_WPUT); 6793 return; 6794 } 6795 IRE_REFHOLD_NOTR(ire); 6796 6797 mutex_enter(&connp->conn_lock); 6798 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL) { 6799 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6800 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6801 connp->conn_ire_cache = ire; 6802 cached = B_TRUE; 6803 } 6804 rw_exit(&ire->ire_bucket->irb_lock); 6805 } 6806 mutex_exit(&connp->conn_lock); 6807 6808 /* 6809 * We can continue to use the ire but since it was not 6810 * cached, we should drop the extra reference. 6811 */ 6812 if (!cached) 6813 IRE_REFRELE_NOTR(ire); 6814 } 6815 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6816 ASSERT(!CLASSD(dst) || ipif != NULL); 6817 6818 /* 6819 * Check if we can take the fast-path. 6820 * Note that "incomplete" ire's (where the link-layer for next hop 6821 * is not resolved, or where the fast-path header in nce_fp_mp is not 6822 * available yet) are sent down the legacy (slow) path 6823 */ 6824 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6825 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6826 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6827 (connp->conn_nexthop_set) || 6828 (ire->ire_nce == NULL) || 6829 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) || 6830 ((ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp))) { 6831 if (ipif != NULL) 6832 ipif_refrele(ipif); 6833 UDP_STAT(us, udp_ip_ire_send); 6834 IRE_REFRELE(ire); 6835 ip_output(connp, mp, q, IP_WPUT); 6836 return; 6837 } 6838 6839 ill = ire_to_ill(ire); 6840 ASSERT(ill != NULL); 6841 6842 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6843 6844 dev_q = ire->ire_stq->q_next; 6845 ASSERT(dev_q != NULL); 6846 /* 6847 * If the service thread is already running, or if the driver 6848 * queue is currently flow-controlled, queue this packet. 6849 */ 6850 if ((q->q_first != NULL || connp->conn_draining) || 6851 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6852 if (ipst->ips_ip_output_queue) { 6853 (void) putq(q, mp); 6854 } else { 6855 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6856 freemsg(mp); 6857 } 6858 if (ipif != NULL) 6859 ipif_refrele(ipif); 6860 IRE_REFRELE(ire); 6861 return; 6862 } 6863 6864 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6865 #ifndef _BIG_ENDIAN 6866 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6867 #endif 6868 6869 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6870 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6871 src = ipha->ipha_src = ipif->ipif_src_addr; 6872 else 6873 src = ipha->ipha_src = ire->ire_src_addr; 6874 } 6875 6876 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6877 ASSERT(ill->ill_hcksum_capab != NULL); 6878 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6879 } else { 6880 hcksum_txflags = 0; 6881 } 6882 6883 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6884 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6885 6886 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6887 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6888 if (*up != 0) { 6889 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6890 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6891 ntohs(ipha->ipha_length), cksum); 6892 6893 /* Software checksum? */ 6894 if (DB_CKSUMFLAGS(mp) == 0) { 6895 UDP_STAT(us, udp_out_sw_cksum); 6896 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 6897 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6898 } 6899 } 6900 6901 if (!CLASSD(dst)) { 6902 ipha->ipha_fragment_offset_and_flags |= 6903 (uint32_t)htons(ire->ire_frag_flag); 6904 } 6905 6906 /* Calculate IP header checksum if hardware isn't capable */ 6907 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6908 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6909 ((uint16_t *)ipha)[4]); 6910 } 6911 6912 if (CLASSD(dst)) { 6913 ilm_t *ilm; 6914 6915 ILM_WALKER_HOLD(ill); 6916 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6917 ILM_WALKER_RELE(ill); 6918 if (ilm != NULL) { 6919 ip_multicast_loopback(q, ill, mp, 6920 connp->conn_multicast_loop ? 0 : 6921 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6922 } 6923 6924 /* If multicast TTL is 0 then we are done */ 6925 if (ipha->ipha_ttl == 0) { 6926 if (ipif != NULL) 6927 ipif_refrele(ipif); 6928 freemsg(mp); 6929 IRE_REFRELE(ire); 6930 return; 6931 } 6932 } 6933 6934 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6935 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6936 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6937 6938 UPDATE_OB_PKT_COUNT(ire); 6939 ire->ire_last_used_time = lbolt; 6940 6941 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6942 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6943 ntohs(ipha->ipha_length)); 6944 6945 if (ILL_DLS_CAPABLE(ill)) { 6946 /* 6947 * Send the packet directly to DLD, where it may be queued 6948 * depending on the availability of transmit resources at 6949 * the media layer. 6950 */ 6951 IP_DLS_ILL_TX(ill, ipha, mp, ipst); 6952 } else { 6953 DTRACE_PROBE4(ip4__physical__out__start, 6954 ill_t *, NULL, ill_t *, ill, 6955 ipha_t *, ipha, mblk_t *, mp); 6956 FW_HOOKS(ipst->ips_ip4_physical_out_event, 6957 ipst->ips_ipv4firewall_physical_out, 6958 NULL, ill, ipha, mp, mp, ipst); 6959 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6960 if (mp != NULL) 6961 putnext(ire->ire_stq, mp); 6962 } 6963 6964 if (ipif != NULL) 6965 ipif_refrele(ipif); 6966 IRE_REFRELE(ire); 6967 } 6968 6969 static boolean_t 6970 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6971 { 6972 udp_t *udp = Q_TO_UDP(wq); 6973 int err; 6974 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6975 6976 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6977 dst, opt_storage, udp->udp_mac_exempt, 6978 udp->udp_us->us_netstack->netstack_ip); 6979 if (err == 0) { 6980 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6981 &udp->udp_label_len_v6, opt_storage); 6982 } 6983 if (err != 0) { 6984 DTRACE_PROBE4( 6985 tx__ip__log__drop__updatelabel__udp6, 6986 char *, "queue(1) failed to update options(2) on mp(3)", 6987 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6988 } else { 6989 udp->udp_v6lastdst = *dst; 6990 } 6991 return (err); 6992 } 6993 6994 /* 6995 * This routine handles all messages passed downstream. It either 6996 * consumes the message or passes it downstream; it never queues a 6997 * a message. 6998 */ 6999 static void 7000 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 7001 { 7002 sin6_t *sin6; 7003 sin_t *sin; 7004 ipaddr_t v4dst; 7005 uint16_t port; 7006 uint_t srcid; 7007 queue_t *q = connp->conn_wq; 7008 udp_t *udp = connp->conn_udp; 7009 int error = 0; 7010 struct sockaddr_storage ss; 7011 udp_stack_t *us = udp->udp_us; 7012 boolean_t insert_spi = udp->udp_nat_t_endpoint; 7013 7014 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 7015 "udp_wput_start: connp %p mp %p", connp, mp); 7016 7017 /* 7018 * We directly handle several cases here: T_UNITDATA_REQ message 7019 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 7020 * connected and non-connected socket. The latter carries the 7021 * address structure along when this routine gets called. 7022 */ 7023 switch (DB_TYPE(mp)) { 7024 case M_DATA: 7025 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 7026 if (!udp->udp_direct_sockfs || 7027 addr == NULL || addrlen == 0) { 7028 /* Not connected; address is required */ 7029 BUMP_MIB(&udp->udp_mib, udpOutErrors); 7030 UDP_STAT(us, udp_out_err_notconn); 7031 freemsg(mp); 7032 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7033 "udp_wput_end: connp %p (%S)", connp, 7034 "not-connected; address required"); 7035 return; 7036 } 7037 ASSERT(udp->udp_issocket); 7038 UDP_DBGSTAT(us, udp_data_notconn); 7039 /* Not connected; do some more checks below */ 7040 break; 7041 } 7042 /* M_DATA for connected socket */ 7043 UDP_DBGSTAT(us, udp_data_conn); 7044 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 7045 7046 /* Initialize addr and addrlen as if they're passed in */ 7047 if (udp->udp_family == AF_INET) { 7048 sin = (sin_t *)&ss; 7049 sin->sin_family = AF_INET; 7050 sin->sin_port = udp->udp_dstport; 7051 sin->sin_addr.s_addr = v4dst; 7052 addr = (struct sockaddr *)sin; 7053 addrlen = sizeof (*sin); 7054 } else { 7055 sin6 = (sin6_t *)&ss; 7056 sin6->sin6_family = AF_INET6; 7057 sin6->sin6_port = udp->udp_dstport; 7058 sin6->sin6_flowinfo = udp->udp_flowinfo; 7059 sin6->sin6_addr = udp->udp_v6dst; 7060 sin6->sin6_scope_id = 0; 7061 sin6->__sin6_src_id = 0; 7062 addr = (struct sockaddr *)sin6; 7063 addrlen = sizeof (*sin6); 7064 } 7065 7066 if (udp->udp_family == AF_INET || 7067 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 7068 /* 7069 * Handle both AF_INET and AF_INET6; the latter 7070 * for IPV4 mapped destination addresses. Note 7071 * here that both addr and addrlen point to the 7072 * corresponding struct depending on the address 7073 * family of the socket. 7074 */ 7075 mp = udp_output_v4(connp, mp, v4dst, 7076 udp->udp_dstport, 0, &error, insert_spi); 7077 } else { 7078 mp = udp_output_v6(connp, mp, sin6, &error); 7079 } 7080 if (error != 0) { 7081 ASSERT(addr != NULL && addrlen != 0); 7082 goto ud_error; 7083 } 7084 return; 7085 case M_PROTO: 7086 case M_PCPROTO: { 7087 struct T_unitdata_req *tudr; 7088 7089 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 7090 tudr = (struct T_unitdata_req *)mp->b_rptr; 7091 7092 /* Handle valid T_UNITDATA_REQ here */ 7093 if (MBLKL(mp) >= sizeof (*tudr) && 7094 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 7095 if (mp->b_cont == NULL) { 7096 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7097 "udp_wput_end: q %p (%S)", q, "badaddr"); 7098 error = EPROTO; 7099 goto ud_error; 7100 } 7101 7102 if (!MBLKIN(mp, 0, tudr->DEST_offset + 7103 tudr->DEST_length)) { 7104 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7105 "udp_wput_end: q %p (%S)", q, "badaddr"); 7106 error = EADDRNOTAVAIL; 7107 goto ud_error; 7108 } 7109 /* 7110 * If a port has not been bound to the stream, fail. 7111 * This is not a problem when sockfs is directly 7112 * above us, because it will ensure that the socket 7113 * is first bound before allowing data to be sent. 7114 */ 7115 if (udp->udp_state == TS_UNBND) { 7116 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7117 "udp_wput_end: q %p (%S)", q, "outstate"); 7118 error = EPROTO; 7119 goto ud_error; 7120 } 7121 addr = (struct sockaddr *) 7122 &mp->b_rptr[tudr->DEST_offset]; 7123 addrlen = tudr->DEST_length; 7124 if (tudr->OPT_length != 0) 7125 UDP_STAT(us, udp_out_opt); 7126 break; 7127 } 7128 /* FALLTHRU */ 7129 } 7130 default: 7131 udp_become_writer(connp, mp, udp_wput_other_wrapper, 7132 SQTAG_UDP_OUTPUT); 7133 return; 7134 } 7135 ASSERT(addr != NULL); 7136 7137 switch (udp->udp_family) { 7138 case AF_INET6: 7139 sin6 = (sin6_t *)addr; 7140 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 7141 sin6->sin6_family != AF_INET6) { 7142 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7143 "udp_wput_end: q %p (%S)", q, "badaddr"); 7144 error = EADDRNOTAVAIL; 7145 goto ud_error; 7146 } 7147 7148 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 7149 /* 7150 * Destination is a non-IPv4-compatible IPv6 address. 7151 * Send out an IPv6 format packet. 7152 */ 7153 mp = udp_output_v6(connp, mp, sin6, &error); 7154 if (error != 0) 7155 goto ud_error; 7156 7157 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7158 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 7159 return; 7160 } 7161 /* 7162 * If the local address is not zero or a mapped address 7163 * return an error. It would be possible to send an IPv4 7164 * packet but the response would never make it back to the 7165 * application since it is bound to a non-mapped address. 7166 */ 7167 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 7168 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7169 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7170 "udp_wput_end: q %p (%S)", q, "badaddr"); 7171 error = EADDRNOTAVAIL; 7172 goto ud_error; 7173 } 7174 /* Send IPv4 packet without modifying udp_ipversion */ 7175 /* Extract port and ipaddr */ 7176 port = sin6->sin6_port; 7177 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 7178 srcid = sin6->__sin6_src_id; 7179 break; 7180 7181 case AF_INET: 7182 sin = (sin_t *)addr; 7183 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 7184 sin->sin_family != AF_INET) { 7185 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7186 "udp_wput_end: q %p (%S)", q, "badaddr"); 7187 error = EADDRNOTAVAIL; 7188 goto ud_error; 7189 } 7190 /* Extract port and ipaddr */ 7191 port = sin->sin_port; 7192 v4dst = sin->sin_addr.s_addr; 7193 srcid = 0; 7194 break; 7195 } 7196 7197 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi); 7198 if (error != 0) { 7199 ud_error: 7200 UDP_STAT(us, udp_out_err_output); 7201 ASSERT(mp != NULL); 7202 /* mp is freed by the following routine */ 7203 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 7204 (t_scalar_t)error); 7205 } 7206 } 7207 7208 /* ARGSUSED */ 7209 static void 7210 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 7211 { 7212 udp_output((conn_t *)arg, mp, NULL, 0); 7213 _UDP_EXIT((conn_t *)arg); 7214 } 7215 7216 static void 7217 udp_wput(queue_t *q, mblk_t *mp) 7218 { 7219 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 7220 SQTAG_UDP_WPUT); 7221 } 7222 7223 /* 7224 * Allocate and prepare a T_UNITDATA_REQ message. 7225 */ 7226 static mblk_t * 7227 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 7228 { 7229 struct T_unitdata_req *tudr; 7230 mblk_t *mp; 7231 7232 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 7233 if (mp != NULL) { 7234 mp->b_wptr += sizeof (*tudr) + addrlen; 7235 DB_TYPE(mp) = M_PROTO; 7236 7237 tudr = (struct T_unitdata_req *)mp->b_rptr; 7238 tudr->PRIM_type = T_UNITDATA_REQ; 7239 tudr->DEST_length = addrlen; 7240 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 7241 tudr->OPT_length = 0; 7242 tudr->OPT_offset = 0; 7243 bcopy(addr, tudr+1, addrlen); 7244 } 7245 return (mp); 7246 } 7247 7248 /* 7249 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 7250 * is valid when we are directly beneath the stream head, and thus sockfs 7251 * is able to bypass STREAMS and directly call us, passing along the sockaddr 7252 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 7253 * this is done for both connected and non-connected endpoint. 7254 */ 7255 void 7256 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 7257 { 7258 conn_t *connp; 7259 udp_t *udp; 7260 udp_stack_t *us; 7261 7262 q = UDP_WR(q); 7263 connp = Q_TO_CONN(q); 7264 udp = connp->conn_udp; 7265 us = udp->udp_us; 7266 7267 /* udpsockfs should only send down M_DATA for this entry point */ 7268 ASSERT(DB_TYPE(mp) == M_DATA); 7269 7270 mutex_enter(&connp->conn_lock); 7271 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 7272 7273 if (udp->udp_mode != UDP_MT_HOT) { 7274 /* 7275 * We can't enter this conn right away because another 7276 * thread is currently executing as writer; therefore we 7277 * need to deposit the message into the squeue to be 7278 * drained later. If a socket address is present, we 7279 * need to create a T_UNITDATA_REQ message as placeholder. 7280 */ 7281 if (addr != NULL && addrlen != 0) { 7282 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 7283 7284 if (tudr_mp == NULL) { 7285 mutex_exit(&connp->conn_lock); 7286 BUMP_MIB(&udp->udp_mib, udpOutErrors); 7287 UDP_STAT(us, udp_out_err_tudr); 7288 freemsg(mp); 7289 return; 7290 } 7291 /* Tag the packet with T_UNITDATA_REQ */ 7292 tudr_mp->b_cont = mp; 7293 mp = tudr_mp; 7294 } 7295 mutex_exit(&connp->conn_lock); 7296 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 7297 return; 7298 } 7299 7300 /* We can execute as reader right away. */ 7301 UDP_READERS_INCREF(udp); 7302 mutex_exit(&connp->conn_lock); 7303 7304 udp_output(connp, mp, addr, addrlen); 7305 7306 udp_exit(connp); 7307 } 7308 7309 /* 7310 * udp_output_v6(): 7311 * Assumes that udp_wput did some sanity checking on the destination 7312 * address. 7313 */ 7314 static mblk_t * 7315 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 7316 { 7317 ip6_t *ip6h; 7318 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 7319 mblk_t *mp1 = mp; 7320 mblk_t *mp2; 7321 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7322 size_t ip_len; 7323 udpha_t *udph; 7324 udp_t *udp = connp->conn_udp; 7325 queue_t *q = connp->conn_wq; 7326 ip6_pkt_t ipp_s; /* For ancillary data options */ 7327 ip6_pkt_t *ipp = &ipp_s; 7328 ip6_pkt_t *tipp; /* temporary ipp */ 7329 uint32_t csum = 0; 7330 uint_t ignore = 0; 7331 uint_t option_exists = 0, is_sticky = 0; 7332 uint8_t *cp; 7333 uint8_t *nxthdr_ptr; 7334 in6_addr_t ip6_dst; 7335 udpattrs_t attrs; 7336 boolean_t opt_present; 7337 ip6_hbh_t *hopoptsptr = NULL; 7338 uint_t hopoptslen = 0; 7339 boolean_t is_ancillary = B_FALSE; 7340 udp_stack_t *us = udp->udp_us; 7341 7342 *error = 0; 7343 7344 /* 7345 * If the local address is a mapped address return 7346 * an error. 7347 * It would be possible to send an IPv6 packet but the 7348 * response would never make it back to the application 7349 * since it is bound to a mapped address. 7350 */ 7351 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 7352 *error = EADDRNOTAVAIL; 7353 goto done; 7354 } 7355 7356 ipp->ipp_fields = 0; 7357 ipp->ipp_sticky_ignored = 0; 7358 7359 /* 7360 * If TPI options passed in, feed it for verification and handling 7361 */ 7362 attrs.udpattr_credset = B_FALSE; 7363 opt_present = B_FALSE; 7364 if (DB_TYPE(mp) != M_DATA) { 7365 mp1 = mp->b_cont; 7366 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 7367 attrs.udpattr_ipp6 = ipp; 7368 attrs.udpattr_mb = mp; 7369 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 7370 goto done; 7371 ASSERT(*error == 0); 7372 opt_present = B_TRUE; 7373 } 7374 } 7375 ignore = ipp->ipp_sticky_ignored; 7376 7377 /* mp1 points to the M_DATA mblk carrying the packet */ 7378 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 7379 7380 if (sin6->sin6_scope_id != 0 && 7381 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 7382 /* 7383 * IPPF_SCOPE_ID is special. It's neither a sticky 7384 * option nor ancillary data. It needs to be 7385 * explicitly set in options_exists. 7386 */ 7387 option_exists |= IPPF_SCOPE_ID; 7388 } 7389 7390 /* 7391 * Compute the destination address 7392 */ 7393 ip6_dst = sin6->sin6_addr; 7394 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 7395 ip6_dst = ipv6_loopback; 7396 7397 /* 7398 * If we're not going to the same destination as last time, then 7399 * recompute the label required. This is done in a separate routine to 7400 * avoid blowing up our stack here. 7401 * 7402 * TSOL Note: Since we are not in WRITER mode, UDP packets 7403 * to different destination may require different labels, 7404 * or worse, UDP packets to same IP address may require 7405 * different labels due to use of shared all-zones address. 7406 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 7407 * and sticky ipp_hopoptslen are consistent for the current 7408 * destination and are updated atomically. 7409 */ 7410 mutex_enter(&connp->conn_lock); 7411 if (is_system_labeled()) { 7412 /* Using UDP MLP requires SCM_UCRED from user */ 7413 if (connp->conn_mlp_type != mlptSingle && 7414 !attrs.udpattr_credset) { 7415 DTRACE_PROBE4( 7416 tx__ip__log__info__output__udp6, 7417 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 7418 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 7419 *error = ECONNREFUSED; 7420 mutex_exit(&connp->conn_lock); 7421 goto done; 7422 } 7423 /* 7424 * update label option for this UDP socket if 7425 * - the destination has changed, or 7426 * - the UDP socket is MLP 7427 */ 7428 if ((opt_present || 7429 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 7430 connp->conn_mlp_type != mlptSingle) && 7431 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 7432 mutex_exit(&connp->conn_lock); 7433 goto done; 7434 } 7435 } 7436 7437 /* 7438 * If there's a security label here, then we ignore any options the 7439 * user may try to set. We keep the peer's label as a hidden sticky 7440 * option. We make a private copy of this label before releasing the 7441 * lock so that label is kept consistent with the destination addr. 7442 */ 7443 if (udp->udp_label_len_v6 > 0) { 7444 ignore &= ~IPPF_HOPOPTS; 7445 ipp->ipp_fields &= ~IPPF_HOPOPTS; 7446 } 7447 7448 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 7449 /* No sticky options nor ancillary data. */ 7450 mutex_exit(&connp->conn_lock); 7451 goto no_options; 7452 } 7453 7454 /* 7455 * Go through the options figuring out where each is going to 7456 * come from and build two masks. The first mask indicates if 7457 * the option exists at all. The second mask indicates if the 7458 * option is sticky or ancillary. 7459 */ 7460 if (!(ignore & IPPF_HOPOPTS)) { 7461 if (ipp->ipp_fields & IPPF_HOPOPTS) { 7462 option_exists |= IPPF_HOPOPTS; 7463 udp_ip_hdr_len += ipp->ipp_hopoptslen; 7464 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 7465 option_exists |= IPPF_HOPOPTS; 7466 is_sticky |= IPPF_HOPOPTS; 7467 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 7468 hopoptsptr = kmem_alloc( 7469 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 7470 if (hopoptsptr == NULL) { 7471 *error = ENOMEM; 7472 mutex_exit(&connp->conn_lock); 7473 goto done; 7474 } 7475 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 7476 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 7477 hopoptslen); 7478 udp_ip_hdr_len += hopoptslen; 7479 } 7480 } 7481 mutex_exit(&connp->conn_lock); 7482 7483 if (!(ignore & IPPF_RTHDR)) { 7484 if (ipp->ipp_fields & IPPF_RTHDR) { 7485 option_exists |= IPPF_RTHDR; 7486 udp_ip_hdr_len += ipp->ipp_rthdrlen; 7487 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 7488 option_exists |= IPPF_RTHDR; 7489 is_sticky |= IPPF_RTHDR; 7490 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 7491 } 7492 } 7493 7494 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 7495 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 7496 option_exists |= IPPF_RTDSTOPTS; 7497 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 7498 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 7499 option_exists |= IPPF_RTDSTOPTS; 7500 is_sticky |= IPPF_RTDSTOPTS; 7501 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 7502 } 7503 } 7504 7505 if (!(ignore & IPPF_DSTOPTS)) { 7506 if (ipp->ipp_fields & IPPF_DSTOPTS) { 7507 option_exists |= IPPF_DSTOPTS; 7508 udp_ip_hdr_len += ipp->ipp_dstoptslen; 7509 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 7510 option_exists |= IPPF_DSTOPTS; 7511 is_sticky |= IPPF_DSTOPTS; 7512 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 7513 } 7514 } 7515 7516 if (!(ignore & IPPF_IFINDEX)) { 7517 if (ipp->ipp_fields & IPPF_IFINDEX) { 7518 option_exists |= IPPF_IFINDEX; 7519 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 7520 option_exists |= IPPF_IFINDEX; 7521 is_sticky |= IPPF_IFINDEX; 7522 } 7523 } 7524 7525 if (!(ignore & IPPF_ADDR)) { 7526 if (ipp->ipp_fields & IPPF_ADDR) { 7527 option_exists |= IPPF_ADDR; 7528 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 7529 option_exists |= IPPF_ADDR; 7530 is_sticky |= IPPF_ADDR; 7531 } 7532 } 7533 7534 if (!(ignore & IPPF_DONTFRAG)) { 7535 if (ipp->ipp_fields & IPPF_DONTFRAG) { 7536 option_exists |= IPPF_DONTFRAG; 7537 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 7538 option_exists |= IPPF_DONTFRAG; 7539 is_sticky |= IPPF_DONTFRAG; 7540 } 7541 } 7542 7543 if (!(ignore & IPPF_USE_MIN_MTU)) { 7544 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 7545 option_exists |= IPPF_USE_MIN_MTU; 7546 } else if (udp->udp_sticky_ipp.ipp_fields & 7547 IPPF_USE_MIN_MTU) { 7548 option_exists |= IPPF_USE_MIN_MTU; 7549 is_sticky |= IPPF_USE_MIN_MTU; 7550 } 7551 } 7552 7553 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 7554 option_exists |= IPPF_HOPLIMIT; 7555 /* IPV6_HOPLIMIT can never be sticky */ 7556 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 7557 7558 if (!(ignore & IPPF_UNICAST_HOPS) && 7559 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 7560 option_exists |= IPPF_UNICAST_HOPS; 7561 is_sticky |= IPPF_UNICAST_HOPS; 7562 } 7563 7564 if (!(ignore & IPPF_MULTICAST_HOPS) && 7565 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 7566 option_exists |= IPPF_MULTICAST_HOPS; 7567 is_sticky |= IPPF_MULTICAST_HOPS; 7568 } 7569 7570 if (!(ignore & IPPF_TCLASS)) { 7571 if (ipp->ipp_fields & IPPF_TCLASS) { 7572 option_exists |= IPPF_TCLASS; 7573 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 7574 option_exists |= IPPF_TCLASS; 7575 is_sticky |= IPPF_TCLASS; 7576 } 7577 } 7578 7579 if (!(ignore & IPPF_NEXTHOP) && 7580 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 7581 option_exists |= IPPF_NEXTHOP; 7582 is_sticky |= IPPF_NEXTHOP; 7583 } 7584 7585 no_options: 7586 7587 /* 7588 * If any options carried in the ip6i_t were specified, we 7589 * need to account for the ip6i_t in the data we'll be sending 7590 * down. 7591 */ 7592 if (option_exists & IPPF_HAS_IP6I) 7593 udp_ip_hdr_len += sizeof (ip6i_t); 7594 7595 /* check/fix buffer config, setup pointers into it */ 7596 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 7597 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 7598 !OK_32PTR(ip6h)) { 7599 /* Try to get everything in a single mblk next time */ 7600 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 7601 udp->udp_max_hdr_len = udp_ip_hdr_len; 7602 (void) mi_set_sth_wroff(UDP_RD(q), 7603 udp->udp_max_hdr_len + us->us_wroff_extra); 7604 } 7605 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 7606 if (mp2 == NULL) { 7607 *error = ENOMEM; 7608 goto done; 7609 } 7610 mp2->b_wptr = DB_LIM(mp2); 7611 mp2->b_cont = mp1; 7612 mp1 = mp2; 7613 if (DB_TYPE(mp) != M_DATA) 7614 mp->b_cont = mp1; 7615 else 7616 mp = mp1; 7617 7618 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 7619 } 7620 mp1->b_rptr = (unsigned char *)ip6h; 7621 ip6i = (ip6i_t *)ip6h; 7622 7623 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 7624 if (option_exists & IPPF_HAS_IP6I) { 7625 ip6h = (ip6_t *)&ip6i[1]; 7626 ip6i->ip6i_flags = 0; 7627 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7628 7629 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 7630 if (option_exists & IPPF_SCOPE_ID) { 7631 ip6i->ip6i_flags |= IP6I_IFINDEX; 7632 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 7633 } else if (option_exists & IPPF_IFINDEX) { 7634 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 7635 ASSERT(tipp->ipp_ifindex != 0); 7636 ip6i->ip6i_flags |= IP6I_IFINDEX; 7637 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 7638 } 7639 7640 if (option_exists & IPPF_ADDR) { 7641 /* 7642 * Enable per-packet source address verification if 7643 * IPV6_PKTINFO specified the source address. 7644 * ip6_src is set in the transport's _wput function. 7645 */ 7646 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7647 } 7648 7649 if (option_exists & IPPF_DONTFRAG) { 7650 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7651 } 7652 7653 if (option_exists & IPPF_USE_MIN_MTU) { 7654 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7655 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7656 } 7657 7658 if (option_exists & IPPF_NEXTHOP) { 7659 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7660 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7661 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7662 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7663 } 7664 7665 /* 7666 * tell IP this is an ip6i_t private header 7667 */ 7668 ip6i->ip6i_nxt = IPPROTO_RAW; 7669 } 7670 7671 /* Initialize IPv6 header */ 7672 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7673 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7674 7675 /* Set the hoplimit of the outgoing packet. */ 7676 if (option_exists & IPPF_HOPLIMIT) { 7677 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7678 ip6h->ip6_hops = ipp->ipp_hoplimit; 7679 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7680 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7681 ip6h->ip6_hops = udp->udp_multicast_ttl; 7682 if (option_exists & IPPF_MULTICAST_HOPS) 7683 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7684 } else { 7685 ip6h->ip6_hops = udp->udp_ttl; 7686 if (option_exists & IPPF_UNICAST_HOPS) 7687 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7688 } 7689 7690 if (option_exists & IPPF_ADDR) { 7691 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7692 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7693 ip6h->ip6_src = tipp->ipp_addr; 7694 } else { 7695 /* 7696 * The source address was not set using IPV6_PKTINFO. 7697 * First look at the bound source. 7698 * If unspecified fallback to __sin6_src_id. 7699 */ 7700 ip6h->ip6_src = udp->udp_v6src; 7701 if (sin6->__sin6_src_id != 0 && 7702 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7703 ip_srcid_find_id(sin6->__sin6_src_id, 7704 &ip6h->ip6_src, connp->conn_zoneid, 7705 us->us_netstack); 7706 } 7707 } 7708 7709 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7710 cp = (uint8_t *)&ip6h[1]; 7711 7712 /* 7713 * Here's where we have to start stringing together 7714 * any extension headers in the right order: 7715 * Hop-by-hop, destination, routing, and final destination opts. 7716 */ 7717 if (option_exists & IPPF_HOPOPTS) { 7718 /* Hop-by-hop options */ 7719 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7720 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7721 if (hopoptslen == 0) { 7722 hopoptsptr = tipp->ipp_hopopts; 7723 hopoptslen = tipp->ipp_hopoptslen; 7724 is_ancillary = B_TRUE; 7725 } 7726 7727 *nxthdr_ptr = IPPROTO_HOPOPTS; 7728 nxthdr_ptr = &hbh->ip6h_nxt; 7729 7730 bcopy(hopoptsptr, cp, hopoptslen); 7731 cp += hopoptslen; 7732 7733 if (hopoptsptr != NULL && !is_ancillary) { 7734 kmem_free(hopoptsptr, hopoptslen); 7735 hopoptsptr = NULL; 7736 hopoptslen = 0; 7737 } 7738 } 7739 /* 7740 * En-route destination options 7741 * Only do them if there's a routing header as well 7742 */ 7743 if (option_exists & IPPF_RTDSTOPTS) { 7744 ip6_dest_t *dst = (ip6_dest_t *)cp; 7745 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7746 7747 *nxthdr_ptr = IPPROTO_DSTOPTS; 7748 nxthdr_ptr = &dst->ip6d_nxt; 7749 7750 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7751 cp += tipp->ipp_rtdstoptslen; 7752 } 7753 /* 7754 * Routing header next 7755 */ 7756 if (option_exists & IPPF_RTHDR) { 7757 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7758 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7759 7760 *nxthdr_ptr = IPPROTO_ROUTING; 7761 nxthdr_ptr = &rt->ip6r_nxt; 7762 7763 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7764 cp += tipp->ipp_rthdrlen; 7765 } 7766 /* 7767 * Do ultimate destination options 7768 */ 7769 if (option_exists & IPPF_DSTOPTS) { 7770 ip6_dest_t *dest = (ip6_dest_t *)cp; 7771 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7772 7773 *nxthdr_ptr = IPPROTO_DSTOPTS; 7774 nxthdr_ptr = &dest->ip6d_nxt; 7775 7776 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7777 cp += tipp->ipp_dstoptslen; 7778 } 7779 /* 7780 * Now set the last header pointer to the proto passed in 7781 */ 7782 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7783 *nxthdr_ptr = IPPROTO_UDP; 7784 7785 /* Update UDP header */ 7786 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7787 udph->uha_dst_port = sin6->sin6_port; 7788 udph->uha_src_port = udp->udp_port; 7789 7790 /* 7791 * Copy in the destination address 7792 */ 7793 ip6h->ip6_dst = ip6_dst; 7794 7795 ip6h->ip6_vcf = 7796 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7797 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7798 7799 if (option_exists & IPPF_TCLASS) { 7800 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7801 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7802 tipp->ipp_tclass); 7803 } 7804 7805 if (option_exists & IPPF_RTHDR) { 7806 ip6_rthdr_t *rth; 7807 7808 /* 7809 * Perform any processing needed for source routing. 7810 * We know that all extension headers will be in the same mblk 7811 * as the IPv6 header. 7812 */ 7813 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7814 if (rth != NULL && rth->ip6r_segleft != 0) { 7815 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7816 /* 7817 * Drop packet - only support Type 0 routing. 7818 * Notify the application as well. 7819 */ 7820 *error = EPROTO; 7821 goto done; 7822 } 7823 7824 /* 7825 * rth->ip6r_len is twice the number of 7826 * addresses in the header. Thus it must be even. 7827 */ 7828 if (rth->ip6r_len & 0x1) { 7829 *error = EPROTO; 7830 goto done; 7831 } 7832 /* 7833 * Shuffle the routing header and ip6_dst 7834 * addresses, and get the checksum difference 7835 * between the first hop (in ip6_dst) and 7836 * the destination (in the last routing hdr entry). 7837 */ 7838 csum = ip_massage_options_v6(ip6h, rth, 7839 us->us_netstack); 7840 /* 7841 * Verify that the first hop isn't a mapped address. 7842 * Routers along the path need to do this verification 7843 * for subsequent hops. 7844 */ 7845 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7846 *error = EADDRNOTAVAIL; 7847 goto done; 7848 } 7849 7850 cp += (rth->ip6r_len + 1)*8; 7851 } 7852 } 7853 7854 /* count up length of UDP packet */ 7855 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7856 if ((mp2 = mp1->b_cont) != NULL) { 7857 do { 7858 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7859 ip_len += (uint32_t)MBLKL(mp2); 7860 } while ((mp2 = mp2->b_cont) != NULL); 7861 } 7862 7863 /* 7864 * If the size of the packet is greater than the maximum allowed by 7865 * ip, return an error. Passing this down could cause panics because 7866 * the size will have wrapped and be inconsistent with the msg size. 7867 */ 7868 if (ip_len > IP_MAXPACKET) { 7869 *error = EMSGSIZE; 7870 goto done; 7871 } 7872 7873 /* Store the UDP length. Subtract length of extension hdrs */ 7874 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7875 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7876 7877 /* 7878 * We make it easy for IP to include our pseudo header 7879 * by putting our length in uh_checksum, modified (if 7880 * we have a routing header) by the checksum difference 7881 * between the ultimate destination and first hop addresses. 7882 * Note: UDP over IPv6 must always checksum the packet. 7883 */ 7884 csum += udph->uha_length; 7885 csum = (csum & 0xFFFF) + (csum >> 16); 7886 udph->uha_checksum = (uint16_t)csum; 7887 7888 #ifdef _LITTLE_ENDIAN 7889 ip_len = htons(ip_len); 7890 #endif 7891 ip6h->ip6_plen = ip_len; 7892 if (DB_CRED(mp) != NULL) 7893 mblk_setcred(mp1, DB_CRED(mp)); 7894 7895 if (DB_TYPE(mp) != M_DATA) { 7896 ASSERT(mp != mp1); 7897 freeb(mp); 7898 } 7899 7900 /* mp has been consumed and we'll return success */ 7901 ASSERT(*error == 0); 7902 mp = NULL; 7903 7904 /* We're done. Pass the packet to IP */ 7905 BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); 7906 ip_output_v6(connp, mp1, q, IP_WPUT); 7907 7908 done: 7909 if (hopoptsptr != NULL && !is_ancillary) { 7910 kmem_free(hopoptsptr, hopoptslen); 7911 hopoptsptr = NULL; 7912 } 7913 if (*error != 0) { 7914 ASSERT(mp != NULL); 7915 BUMP_MIB(&udp->udp_mib, udpOutErrors); 7916 } 7917 return (mp); 7918 } 7919 7920 static void 7921 udp_wput_other(queue_t *q, mblk_t *mp) 7922 { 7923 uchar_t *rptr = mp->b_rptr; 7924 struct datab *db; 7925 struct iocblk *iocp; 7926 cred_t *cr; 7927 conn_t *connp = Q_TO_CONN(q); 7928 udp_t *udp = connp->conn_udp; 7929 udp_stack_t *us; 7930 7931 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7932 "udp_wput_other_start: q %p", q); 7933 7934 us = udp->udp_us; 7935 db = mp->b_datap; 7936 7937 cr = DB_CREDDEF(mp, connp->conn_cred); 7938 7939 switch (db->db_type) { 7940 case M_PROTO: 7941 case M_PCPROTO: 7942 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7943 freemsg(mp); 7944 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7945 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 7946 return; 7947 } 7948 switch (((t_primp_t)rptr)->type) { 7949 case T_ADDR_REQ: 7950 udp_addr_req(q, mp); 7951 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7952 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7953 return; 7954 case O_T_BIND_REQ: 7955 case T_BIND_REQ: 7956 udp_bind(q, mp); 7957 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7958 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7959 return; 7960 case T_CONN_REQ: 7961 udp_connect(q, mp); 7962 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7963 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7964 return; 7965 case T_CAPABILITY_REQ: 7966 udp_capability_req(q, mp); 7967 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7968 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7969 return; 7970 case T_INFO_REQ: 7971 udp_info_req(q, mp); 7972 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7973 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7974 return; 7975 case T_UNITDATA_REQ: 7976 /* 7977 * If a T_UNITDATA_REQ gets here, the address must 7978 * be bad. Valid T_UNITDATA_REQs are handled 7979 * in udp_wput. 7980 */ 7981 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7982 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7983 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7984 return; 7985 case T_UNBIND_REQ: 7986 udp_unbind(q, mp); 7987 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7988 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7989 return; 7990 case T_SVR4_OPTMGMT_REQ: 7991 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7992 /* 7993 * Use upper queue for option processing in 7994 * case the request is not handled at this 7995 * level and needs to be passed down to IP. 7996 */ 7997 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7998 mp, cr, &udp_opt_obj); 7999 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8000 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 8001 return; 8002 8003 case T_OPTMGMT_REQ: 8004 /* 8005 * Use upper queue for option processing in 8006 * case the request is not handled at this 8007 * level and needs to be passed down to IP. 8008 */ 8009 (void) tpi_optcom_req(_WR(UDP_RD(q)), 8010 mp, cr, &udp_opt_obj); 8011 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8012 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 8013 return; 8014 8015 case T_DISCON_REQ: 8016 udp_disconnect(q, mp); 8017 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8018 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 8019 return; 8020 8021 /* The following TPI message is not supported by udp. */ 8022 case O_T_CONN_RES: 8023 case T_CONN_RES: 8024 udp_err_ack(q, mp, TNOTSUPPORT, 0); 8025 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8026 "udp_wput_other_end: q %p (%S)", q, 8027 "connres/disconreq"); 8028 return; 8029 8030 /* The following 3 TPI messages are illegal for udp. */ 8031 case T_DATA_REQ: 8032 case T_EXDATA_REQ: 8033 case T_ORDREL_REQ: 8034 udp_err_ack(q, mp, TNOTSUPPORT, 0); 8035 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8036 "udp_wput_other_end: q %p (%S)", q, 8037 "data/exdata/ordrel"); 8038 return; 8039 default: 8040 break; 8041 } 8042 break; 8043 case M_FLUSH: 8044 if (*rptr & FLUSHW) 8045 flushq(q, FLUSHDATA); 8046 break; 8047 case M_IOCTL: 8048 iocp = (struct iocblk *)mp->b_rptr; 8049 switch (iocp->ioc_cmd) { 8050 case TI_GETPEERNAME: 8051 if (udp->udp_state != TS_DATA_XFER) { 8052 /* 8053 * If a default destination address has not 8054 * been associated with the stream, then we 8055 * don't know the peer's name. 8056 */ 8057 iocp->ioc_error = ENOTCONN; 8058 iocp->ioc_count = 0; 8059 mp->b_datap->db_type = M_IOCACK; 8060 putnext(UDP_RD(q), mp); 8061 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8062 "udp_wput_other_end: q %p (%S)", q, 8063 "getpeername"); 8064 return; 8065 } 8066 /* FALLTHRU */ 8067 case TI_GETMYNAME: { 8068 /* 8069 * For TI_GETPEERNAME and TI_GETMYNAME, we first 8070 * need to copyin the user's strbuf structure. 8071 * Processing will continue in the M_IOCDATA case 8072 * below. 8073 */ 8074 mi_copyin(q, mp, NULL, 8075 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 8076 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8077 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 8078 return; 8079 } 8080 case ND_SET: 8081 /* nd_getset performs the necessary checking */ 8082 case ND_GET: 8083 if (nd_getset(q, us->us_nd, mp)) { 8084 putnext(UDP_RD(q), mp); 8085 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8086 "udp_wput_other_end: q %p (%S)", q, "get"); 8087 return; 8088 } 8089 break; 8090 case _SIOCSOCKFALLBACK: 8091 /* 8092 * Either sockmod is about to be popped and the 8093 * socket would now be treated as a plain stream, 8094 * or a module is about to be pushed so we could 8095 * no longer use read-side synchronous stream. 8096 * Drain any queued data and disable direct sockfs 8097 * interface from now on. 8098 */ 8099 if (!udp->udp_issocket) { 8100 DB_TYPE(mp) = M_IOCNAK; 8101 iocp->ioc_error = EINVAL; 8102 } else { 8103 udp->udp_issocket = B_FALSE; 8104 if (udp->udp_direct_sockfs) { 8105 /* 8106 * Disable read-side synchronous 8107 * stream interface and drain any 8108 * queued data. 8109 */ 8110 udp_rcv_drain(UDP_RD(q), udp, 8111 B_FALSE); 8112 ASSERT(!udp->udp_direct_sockfs); 8113 UDP_STAT(us, udp_sock_fallback); 8114 } 8115 DB_TYPE(mp) = M_IOCACK; 8116 iocp->ioc_error = 0; 8117 } 8118 iocp->ioc_count = 0; 8119 iocp->ioc_rval = 0; 8120 putnext(UDP_RD(q), mp); 8121 return; 8122 default: 8123 break; 8124 } 8125 break; 8126 case M_IOCDATA: 8127 udp_wput_iocdata(q, mp); 8128 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8129 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 8130 return; 8131 default: 8132 /* Unrecognized messages are passed through without change. */ 8133 break; 8134 } 8135 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8136 "udp_wput_other_end: q %p (%S)", q, "end"); 8137 ip_output(connp, mp, q, IP_WPUT); 8138 } 8139 8140 /* ARGSUSED */ 8141 static void 8142 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 8143 { 8144 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 8145 udp_exit((conn_t *)arg); 8146 } 8147 8148 /* 8149 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 8150 * messages. 8151 */ 8152 static void 8153 udp_wput_iocdata(queue_t *q, mblk_t *mp) 8154 { 8155 mblk_t *mp1; 8156 STRUCT_HANDLE(strbuf, sb); 8157 uint16_t port; 8158 in6_addr_t v6addr; 8159 ipaddr_t v4addr; 8160 uint32_t flowinfo = 0; 8161 int addrlen; 8162 udp_t *udp = Q_TO_UDP(q); 8163 8164 /* Make sure it is one of ours. */ 8165 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8166 case TI_GETMYNAME: 8167 case TI_GETPEERNAME: 8168 break; 8169 default: 8170 ip_output(udp->udp_connp, mp, q, IP_WPUT); 8171 return; 8172 } 8173 8174 q = WR(UDP_RD(q)); 8175 switch (mi_copy_state(q, mp, &mp1)) { 8176 case -1: 8177 return; 8178 case MI_COPY_CASE(MI_COPY_IN, 1): 8179 break; 8180 case MI_COPY_CASE(MI_COPY_OUT, 1): 8181 /* 8182 * The address has been copied out, so now 8183 * copyout the strbuf. 8184 */ 8185 mi_copyout(q, mp); 8186 return; 8187 case MI_COPY_CASE(MI_COPY_OUT, 2): 8188 /* 8189 * The address and strbuf have been copied out. 8190 * We're done, so just acknowledge the original 8191 * M_IOCTL. 8192 */ 8193 mi_copy_done(q, mp, 0); 8194 return; 8195 default: 8196 /* 8197 * Something strange has happened, so acknowledge 8198 * the original M_IOCTL with an EPROTO error. 8199 */ 8200 mi_copy_done(q, mp, EPROTO); 8201 return; 8202 } 8203 8204 /* 8205 * Now we have the strbuf structure for TI_GETMYNAME 8206 * and TI_GETPEERNAME. Next we copyout the requested 8207 * address and then we'll copyout the strbuf. 8208 */ 8209 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 8210 (void *)mp1->b_rptr); 8211 if (udp->udp_family == AF_INET) 8212 addrlen = sizeof (sin_t); 8213 else 8214 addrlen = sizeof (sin6_t); 8215 8216 if (STRUCT_FGET(sb, maxlen) < addrlen) { 8217 mi_copy_done(q, mp, EINVAL); 8218 return; 8219 } 8220 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8221 case TI_GETMYNAME: 8222 if (udp->udp_family == AF_INET) { 8223 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8224 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 8225 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8226 v4addr = V4_PART_OF_V6(udp->udp_v6src); 8227 } else { 8228 /* 8229 * INADDR_ANY 8230 * udp_v6src is not set, we might be bound to 8231 * broadcast/multicast. Use udp_bound_v6src as 8232 * local address instead (that could 8233 * also still be INADDR_ANY) 8234 */ 8235 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 8236 } 8237 } else { 8238 /* udp->udp_family == AF_INET6 */ 8239 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8240 v6addr = udp->udp_v6src; 8241 } else { 8242 /* 8243 * UNSPECIFIED 8244 * udp_v6src is not set, we might be bound to 8245 * broadcast/multicast. Use udp_bound_v6src as 8246 * local address instead (that could 8247 * also still be UNSPECIFIED) 8248 */ 8249 v6addr = udp->udp_bound_v6src; 8250 } 8251 } 8252 port = udp->udp_port; 8253 break; 8254 case TI_GETPEERNAME: 8255 if (udp->udp_state != TS_DATA_XFER) { 8256 mi_copy_done(q, mp, ENOTCONN); 8257 return; 8258 } 8259 if (udp->udp_family == AF_INET) { 8260 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8261 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 8262 } else { 8263 /* udp->udp_family == AF_INET6) */ 8264 v6addr = udp->udp_v6dst; 8265 flowinfo = udp->udp_flowinfo; 8266 } 8267 port = udp->udp_dstport; 8268 break; 8269 default: 8270 mi_copy_done(q, mp, EPROTO); 8271 return; 8272 } 8273 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 8274 if (!mp1) 8275 return; 8276 8277 if (udp->udp_family == AF_INET) { 8278 sin_t *sin; 8279 8280 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 8281 sin = (sin_t *)mp1->b_rptr; 8282 mp1->b_wptr = (uchar_t *)&sin[1]; 8283 *sin = sin_null; 8284 sin->sin_family = AF_INET; 8285 sin->sin_addr.s_addr = v4addr; 8286 sin->sin_port = port; 8287 } else { 8288 /* udp->udp_family == AF_INET6 */ 8289 sin6_t *sin6; 8290 8291 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 8292 sin6 = (sin6_t *)mp1->b_rptr; 8293 mp1->b_wptr = (uchar_t *)&sin6[1]; 8294 *sin6 = sin6_null; 8295 sin6->sin6_family = AF_INET6; 8296 sin6->sin6_flowinfo = flowinfo; 8297 sin6->sin6_addr = v6addr; 8298 sin6->sin6_port = port; 8299 } 8300 /* Copy out the address */ 8301 mi_copyout(q, mp); 8302 } 8303 8304 8305 static int 8306 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 8307 udpattrs_t *udpattrs) 8308 { 8309 struct T_unitdata_req *udreqp; 8310 int is_absreq_failure; 8311 cred_t *cr; 8312 conn_t *connp = Q_TO_CONN(q); 8313 8314 ASSERT(((t_primp_t)mp->b_rptr)->type); 8315 8316 cr = DB_CREDDEF(mp, connp->conn_cred); 8317 8318 udreqp = (struct T_unitdata_req *)mp->b_rptr; 8319 8320 /* 8321 * Use upper queue for option processing since the callback 8322 * routines expect to be called in UDP instance instead of IP. 8323 */ 8324 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 8325 udreqp->OPT_offset, cr, &udp_opt_obj, 8326 udpattrs, &is_absreq_failure); 8327 8328 if (*errorp != 0) { 8329 /* 8330 * Note: No special action needed in this 8331 * module for "is_absreq_failure" 8332 */ 8333 return (-1); /* failure */ 8334 } 8335 ASSERT(is_absreq_failure == 0); 8336 return (0); /* success */ 8337 } 8338 8339 void 8340 udp_ddi_init(void) 8341 { 8342 UDP6_MAJ = ddi_name_to_major(UDP6); 8343 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 8344 udp_opt_obj.odb_opt_arr_cnt); 8345 8346 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 8347 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 8348 8349 /* 8350 * We want to be informed each time a stack is created or 8351 * destroyed in the kernel, so we can maintain the 8352 * set of udp_stack_t's. 8353 */ 8354 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 8355 } 8356 8357 void 8358 udp_ddi_destroy(void) 8359 { 8360 netstack_unregister(NS_UDP); 8361 8362 kmem_cache_destroy(udp_cache); 8363 } 8364 8365 /* 8366 * Initialize the UDP stack instance. 8367 */ 8368 static void * 8369 udp_stack_init(netstackid_t stackid, netstack_t *ns) 8370 { 8371 udp_stack_t *us; 8372 udpparam_t *pa; 8373 int i; 8374 8375 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 8376 us->us_netstack = ns; 8377 8378 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 8379 us->us_epriv_ports[0] = 2049; 8380 us->us_epriv_ports[1] = 4045; 8381 8382 /* 8383 * The smallest anonymous port in the priviledged port range which UDP 8384 * looks for free port. Use in the option UDP_ANONPRIVBIND. 8385 */ 8386 us->us_min_anonpriv_port = 512; 8387 8388 us->us_bind_fanout_size = udp_bind_fanout_size; 8389 8390 /* Roundup variable that might have been modified in /etc/system */ 8391 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 8392 /* Not a power of two. Round up to nearest power of two */ 8393 for (i = 0; i < 31; i++) { 8394 if (us->us_bind_fanout_size < (1 << i)) 8395 break; 8396 } 8397 us->us_bind_fanout_size = 1 << i; 8398 } 8399 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 8400 sizeof (udp_fanout_t), KM_SLEEP); 8401 for (i = 0; i < us->us_bind_fanout_size; i++) { 8402 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 8403 NULL); 8404 } 8405 8406 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 8407 8408 us->us_param_arr = pa; 8409 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 8410 8411 (void) udp_param_register(&us->us_nd, 8412 us->us_param_arr, A_CNT(udp_param_arr)); 8413 8414 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 8415 us->us_mibkp = udp_kstat_init(stackid); 8416 return (us); 8417 } 8418 8419 /* 8420 * Free the UDP stack instance. 8421 */ 8422 static void 8423 udp_stack_fini(netstackid_t stackid, void *arg) 8424 { 8425 udp_stack_t *us = (udp_stack_t *)arg; 8426 int i; 8427 8428 for (i = 0; i < us->us_bind_fanout_size; i++) { 8429 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 8430 } 8431 8432 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 8433 sizeof (udp_fanout_t)); 8434 8435 us->us_bind_fanout = NULL; 8436 8437 nd_free(&us->us_nd); 8438 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 8439 us->us_param_arr = NULL; 8440 8441 udp_kstat_fini(stackid, us->us_mibkp); 8442 us->us_mibkp = NULL; 8443 8444 udp_kstat2_fini(stackid, us->us_kstat); 8445 us->us_kstat = NULL; 8446 bzero(&us->us_statistics, sizeof (us->us_statistics)); 8447 kmem_free(us, sizeof (*us)); 8448 } 8449 8450 static void * 8451 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 8452 { 8453 kstat_t *ksp; 8454 8455 udp_stat_t template = { 8456 { "udp_ip_send", KSTAT_DATA_UINT64 }, 8457 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 8458 { "udp_ire_null", KSTAT_DATA_UINT64 }, 8459 { "udp_drain", KSTAT_DATA_UINT64 }, 8460 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 8461 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 8462 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 8463 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 8464 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 8465 { "udp_out_opt", KSTAT_DATA_UINT64 }, 8466 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 8467 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 8468 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 8469 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 8470 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 8471 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 8472 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 8473 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 8474 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 8475 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 8476 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 8477 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 8478 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 8479 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 8480 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 8481 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 8482 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 8483 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 8484 #ifdef DEBUG 8485 { "udp_data_conn", KSTAT_DATA_UINT64 }, 8486 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 8487 #endif 8488 }; 8489 8490 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 8491 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 8492 KSTAT_FLAG_VIRTUAL, stackid); 8493 8494 if (ksp == NULL) 8495 return (NULL); 8496 8497 bcopy(&template, us_statisticsp, sizeof (template)); 8498 ksp->ks_data = (void *)us_statisticsp; 8499 ksp->ks_private = (void *)(uintptr_t)stackid; 8500 8501 kstat_install(ksp); 8502 return (ksp); 8503 } 8504 8505 static void 8506 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 8507 { 8508 if (ksp != NULL) { 8509 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 8510 kstat_delete_netstack(ksp, stackid); 8511 } 8512 } 8513 8514 static void * 8515 udp_kstat_init(netstackid_t stackid) 8516 { 8517 kstat_t *ksp; 8518 8519 udp_named_kstat_t template = { 8520 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 8521 { "inErrors", KSTAT_DATA_UINT32, 0 }, 8522 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 8523 { "entrySize", KSTAT_DATA_INT32, 0 }, 8524 { "entry6Size", KSTAT_DATA_INT32, 0 }, 8525 { "outErrors", KSTAT_DATA_UINT32, 0 }, 8526 }; 8527 8528 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 8529 KSTAT_TYPE_NAMED, 8530 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 8531 8532 if (ksp == NULL || ksp->ks_data == NULL) 8533 return (NULL); 8534 8535 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 8536 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 8537 8538 bcopy(&template, ksp->ks_data, sizeof (template)); 8539 ksp->ks_update = udp_kstat_update; 8540 ksp->ks_private = (void *)(uintptr_t)stackid; 8541 8542 kstat_install(ksp); 8543 return (ksp); 8544 } 8545 8546 static void 8547 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 8548 { 8549 if (ksp != NULL) { 8550 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 8551 kstat_delete_netstack(ksp, stackid); 8552 } 8553 } 8554 8555 static int 8556 udp_kstat_update(kstat_t *kp, int rw) 8557 { 8558 udp_named_kstat_t *udpkp; 8559 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 8560 netstack_t *ns; 8561 udp_stack_t *us; 8562 8563 if ((kp == NULL) || (kp->ks_data == NULL)) 8564 return (EIO); 8565 8566 if (rw == KSTAT_WRITE) 8567 return (EACCES); 8568 8569 ns = netstack_find_by_stackid(stackid); 8570 if (ns == NULL) 8571 return (-1); 8572 us = ns->netstack_udp; 8573 if (us == NULL) { 8574 netstack_rele(ns); 8575 return (-1); 8576 } 8577 udpkp = (udp_named_kstat_t *)kp->ks_data; 8578 8579 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 8580 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 8581 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 8582 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 8583 netstack_rele(ns); 8584 return (0); 8585 } 8586 8587 /* ARGSUSED */ 8588 static void 8589 udp_rput(queue_t *q, mblk_t *mp) 8590 { 8591 /* 8592 * We get here whenever we do qreply() from IP, 8593 * i.e as part of handlings ioctls, etc. 8594 */ 8595 putnext(q, mp); 8596 } 8597 8598 /* 8599 * Read-side synchronous stream info entry point, called as a 8600 * result of handling certain STREAMS ioctl operations. 8601 */ 8602 static int 8603 udp_rinfop(queue_t *q, infod_t *dp) 8604 { 8605 mblk_t *mp; 8606 uint_t cmd = dp->d_cmd; 8607 int res = 0; 8608 int error = 0; 8609 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 8610 struct stdata *stp = STREAM(q); 8611 8612 mutex_enter(&udp->udp_drain_lock); 8613 /* If shutdown on read has happened, return nothing */ 8614 mutex_enter(&stp->sd_lock); 8615 if (stp->sd_flag & STREOF) { 8616 mutex_exit(&stp->sd_lock); 8617 goto done; 8618 } 8619 mutex_exit(&stp->sd_lock); 8620 8621 if ((mp = udp->udp_rcv_list_head) == NULL) 8622 goto done; 8623 8624 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8625 8626 if (cmd & INFOD_COUNT) { 8627 /* 8628 * Return the number of messages. 8629 */ 8630 dp->d_count += udp->udp_rcv_msgcnt; 8631 res |= INFOD_COUNT; 8632 } 8633 if (cmd & INFOD_BYTES) { 8634 /* 8635 * Return size of all data messages. 8636 */ 8637 dp->d_bytes += udp->udp_rcv_cnt; 8638 res |= INFOD_BYTES; 8639 } 8640 if (cmd & INFOD_FIRSTBYTES) { 8641 /* 8642 * Return size of first data message. 8643 */ 8644 dp->d_bytes = msgdsize(mp); 8645 res |= INFOD_FIRSTBYTES; 8646 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8647 } 8648 if (cmd & INFOD_COPYOUT) { 8649 mblk_t *mp1 = mp->b_cont; 8650 int n; 8651 /* 8652 * Return data contents of first message. 8653 */ 8654 ASSERT(DB_TYPE(mp1) == M_DATA); 8655 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8656 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8657 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8658 UIO_READ, dp->d_uiop)) != 0) { 8659 goto done; 8660 } 8661 mp1 = mp1->b_cont; 8662 } 8663 res |= INFOD_COPYOUT; 8664 dp->d_cmd &= ~INFOD_COPYOUT; 8665 } 8666 done: 8667 mutex_exit(&udp->udp_drain_lock); 8668 8669 dp->d_res |= res; 8670 8671 return (error); 8672 } 8673 8674 /* 8675 * Read-side synchronous stream entry point. This is called as a result 8676 * of recv/read operation done at sockfs, and is guaranteed to execute 8677 * outside of the interrupt thread context. It returns a single datagram 8678 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8679 */ 8680 static int 8681 udp_rrw(queue_t *q, struiod_t *dp) 8682 { 8683 mblk_t *mp; 8684 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 8685 udp_stack_t *us = udp->udp_us; 8686 8687 /* We should never get here when we're in SNMP mode */ 8688 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 8689 8690 /* 8691 * Dequeue datagram from the head of the list and return 8692 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8693 * set/cleared depending on whether or not there's data 8694 * remaining in the list. 8695 */ 8696 mutex_enter(&udp->udp_drain_lock); 8697 if (!udp->udp_direct_sockfs) { 8698 mutex_exit(&udp->udp_drain_lock); 8699 UDP_STAT(us, udp_rrw_busy); 8700 return (EBUSY); 8701 } 8702 if ((mp = udp->udp_rcv_list_head) != NULL) { 8703 uint_t size = msgdsize(mp); 8704 8705 /* Last datagram in the list? */ 8706 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8707 udp->udp_rcv_list_tail = NULL; 8708 mp->b_next = NULL; 8709 8710 udp->udp_rcv_cnt -= size; 8711 udp->udp_rcv_msgcnt--; 8712 UDP_STAT(us, udp_rrw_msgcnt); 8713 8714 /* No longer flow-controlling? */ 8715 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8716 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8717 udp->udp_drain_qfull = B_FALSE; 8718 } 8719 if (udp->udp_rcv_list_head == NULL) { 8720 /* 8721 * Either we just dequeued the last datagram or 8722 * we get here from sockfs and have nothing to 8723 * return; in this case clear RSLEEP. 8724 */ 8725 ASSERT(udp->udp_rcv_cnt == 0); 8726 ASSERT(udp->udp_rcv_msgcnt == 0); 8727 ASSERT(udp->udp_rcv_list_tail == NULL); 8728 STR_WAKEUP_CLEAR(STREAM(q)); 8729 } else { 8730 /* 8731 * More data follows; we need udp_rrw() to be 8732 * called in future to pick up the rest. 8733 */ 8734 STR_WAKEUP_SET(STREAM(q)); 8735 } 8736 mutex_exit(&udp->udp_drain_lock); 8737 dp->d_mp = mp; 8738 return (0); 8739 } 8740 8741 /* 8742 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8743 * list; this is typically executed within the interrupt thread context 8744 * and so we do things as quickly as possible. 8745 */ 8746 static void 8747 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8748 { 8749 ASSERT(q == RD(q)); 8750 ASSERT(pkt_len == msgdsize(mp)); 8751 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8752 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8753 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8754 8755 mutex_enter(&udp->udp_drain_lock); 8756 /* 8757 * Wake up and signal the receiving app; it is okay to do this 8758 * before enqueueing the mp because we are holding the drain lock. 8759 * One of the advantages of synchronous stream is the ability for 8760 * us to find out when the application performs a read on the 8761 * socket by way of udp_rrw() entry point being called. We need 8762 * to generate SIGPOLL/SIGIO for each received data in the case 8763 * of asynchronous socket just as in the strrput() case. However, 8764 * we only wake the application up when necessary, i.e. during the 8765 * first enqueue. When udp_rrw() is called, we send up a single 8766 * datagram upstream and call STR_WAKEUP_SET() again when there 8767 * are still data remaining in our receive queue. 8768 */ 8769 if (udp->udp_rcv_list_head == NULL) { 8770 STR_WAKEUP_SET(STREAM(q)); 8771 udp->udp_rcv_list_head = mp; 8772 } else { 8773 udp->udp_rcv_list_tail->b_next = mp; 8774 } 8775 udp->udp_rcv_list_tail = mp; 8776 udp->udp_rcv_cnt += pkt_len; 8777 udp->udp_rcv_msgcnt++; 8778 8779 /* Need to flow-control? */ 8780 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8781 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8782 udp->udp_drain_qfull = B_TRUE; 8783 8784 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8785 STR_SENDSIG(STREAM(q)); 8786 mutex_exit(&udp->udp_drain_lock); 8787 } 8788 8789 /* 8790 * Drain the contents of receive list to the module upstream; we do 8791 * this during close or when we fallback to the slow mode due to 8792 * sockmod being popped or a module being pushed on top of us. 8793 */ 8794 static void 8795 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8796 { 8797 mblk_t *mp; 8798 udp_stack_t *us = udp->udp_us; 8799 8800 ASSERT(q == RD(q)); 8801 8802 mutex_enter(&udp->udp_drain_lock); 8803 /* 8804 * There is no race with a concurrent udp_input() sending 8805 * up packets using putnext() after we have cleared the 8806 * udp_direct_sockfs flag but before we have completed 8807 * sending up the packets in udp_rcv_list, since we are 8808 * either a writer or we have quiesced the conn. 8809 */ 8810 udp->udp_direct_sockfs = B_FALSE; 8811 mutex_exit(&udp->udp_drain_lock); 8812 8813 if (udp->udp_rcv_list_head != NULL) 8814 UDP_STAT(us, udp_drain); 8815 8816 /* 8817 * Send up everything via putnext(); note here that we 8818 * don't need the udp_drain_lock to protect us since 8819 * nothing can enter udp_rrw() and that we currently 8820 * have exclusive access to this udp. 8821 */ 8822 while ((mp = udp->udp_rcv_list_head) != NULL) { 8823 udp->udp_rcv_list_head = mp->b_next; 8824 mp->b_next = NULL; 8825 udp->udp_rcv_cnt -= msgdsize(mp); 8826 udp->udp_rcv_msgcnt--; 8827 if (closing) { 8828 freemsg(mp); 8829 } else { 8830 putnext(q, mp); 8831 } 8832 } 8833 ASSERT(udp->udp_rcv_cnt == 0); 8834 ASSERT(udp->udp_rcv_msgcnt == 0); 8835 ASSERT(udp->udp_rcv_list_head == NULL); 8836 udp->udp_rcv_list_tail = NULL; 8837 udp->udp_drain_qfull = B_FALSE; 8838 } 8839 8840 static size_t 8841 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8842 { 8843 udp_stack_t *us = udp->udp_us; 8844 8845 /* We add a bit of extra buffering */ 8846 size += size >> 1; 8847 if (size > us->us_max_buf) 8848 size = us->us_max_buf; 8849 8850 udp->udp_rcv_hiwat = size; 8851 return (size); 8852 } 8853