1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/sdt.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/ip_ndp.h> 75 #include <inet/mi.h> 76 #include <inet/mib2.h> 77 #include <inet/nd.h> 78 #include <inet/optcom.h> 79 #include <inet/snmpcom.h> 80 #include <inet/kstatcom.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipclassifier.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipp_common.h> 85 86 /* 87 * The ipsec_info.h header file is here since it has the definition for the 88 * M_CTL message types used by IP to convey information to the ULP. The 89 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 90 */ 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 94 #include <sys/tsol/label.h> 95 #include <sys/tsol/tnet.h> 96 #include <rpc/pmap_prot.h> 97 98 /* 99 * Synchronization notes: 100 * 101 * UDP uses a combination of its internal perimeter, a global lock and 102 * a set of bind hash locks to protect its data structures. Please see 103 * the note above udp_mode_assertions for details about the internal 104 * perimeter. 105 * 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure. An UDP endpoint is removed from the bind hash list only 113 * when it is being unbound or being closed. The per bucket lock also 114 * protects a UDP endpoint's state changes. 115 * 116 * Plumbing notes: 117 * 118 * Both udp and ip are merged, but the streams plumbing is kept unchanged 119 * in that udp is always pushed atop /dev/ip. This is done to preserve 120 * backwards compatibility for certain applications which rely on such 121 * plumbing geometry to do things such as issuing I_POP on the stream 122 * in order to obtain direct access to /dev/ip, etc. 123 * 124 * All UDP processings happen in the /dev/ip instance; the udp module 125 * instance does not possess any state about the endpoint, and merely 126 * acts as a dummy module whose presence is to keep the streams plumbing 127 * appearance unchanged. At open time /dev/ip allocates a conn_t that 128 * happens to embed a udp_t. This stays dormant until the time udp is 129 * pushed, which indicates to /dev/ip that it must convert itself from 130 * an IP to a UDP endpoint. 131 * 132 * We only allow for the following plumbing cases: 133 * 134 * Normal: 135 * /dev/ip is first opened and later udp is pushed directly on top. 136 * This is the default action that happens when a udp socket or 137 * /dev/udp is opened. The conn_t created by /dev/ip instance is 138 * now shared and is marked with IPCL_UDP. 139 * 140 * SNMP-only: 141 * udp is pushed on top of a module other than /dev/ip. When this 142 * happens it will support only SNMP semantics. A new conn_t is 143 * allocated and marked with IPCL_UDPMOD. 144 * 145 * The above cases imply that we don't support any intermediate module to 146 * reside in between /dev/ip and udp -- in fact, we never supported such 147 * scenario in the past as the inter-layer communication semantics have 148 * always been private. Also note that the normal case allows for SNMP 149 * requests to be processed in addition to the rest of UDP operations. 150 * 151 * The normal case plumbing is depicted by the following diagram: 152 * 153 * +---------------+---------------+ 154 * | | | udp 155 * | udp_wq | udp_rq | 156 * | | UDP_RD | 157 * | | | 158 * +---------------+---------------+ 159 * | ^ 160 * v | 161 * +---------------+---------------+ 162 * | | | /dev/ip 163 * | ip_wq | ip_rq | conn_t 164 * | UDP_WR | | 165 * | | | 166 * +---------------+---------------+ 167 * 168 * Messages arriving at udp_wq from above will end up in ip_wq before 169 * it gets processed, i.e. udp write entry points will advance udp_wq 170 * and use its q_next value as ip_wq in order to use the conn_t that 171 * is stored in its q_ptr. Likewise, messages generated by ip to the 172 * module above udp will appear as if they are originated from udp_rq, 173 * i.e. putnext() calls to the module above udp is done using the 174 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 175 * nothing more than calling putnext(). 176 * 177 * The above implies the following rule of thumb: 178 * 179 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 180 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 181 * is no direct reference to conn_t from either udp_wq or udp_rq. 182 * 183 * 2. Write-side entry points of udp can obtain the conn_t via the 184 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 185 * 186 * 3. While in /dev/ip context, putnext() to the module above udp can 187 * be done by supplying the queue value obtained from UDP_RD(). 188 * 189 */ 190 191 static queue_t *UDP_WR(queue_t *); 192 static queue_t *UDP_RD(queue_t *); 193 194 struct kmem_cache *udp_cache; 195 196 /* For /etc/system control */ 197 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 198 199 #define NDD_TOO_QUICK_MSG \ 200 "ndd get info rate too high for non-privileged users, try again " \ 201 "later.\n" 202 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 203 204 /* Option processing attrs */ 205 typedef struct udpattrs_s { 206 union { 207 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 208 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 209 } udpattr_ippu; 210 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 211 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 212 mblk_t *udpattr_mb; 213 boolean_t udpattr_credset; 214 } udpattrs_t; 215 216 static void udp_addr_req(queue_t *q, mblk_t *mp); 217 static void udp_bind(queue_t *q, mblk_t *mp); 218 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 219 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 220 static int udp_build_hdrs(queue_t *q, udp_t *udp); 221 static void udp_capability_req(queue_t *q, mblk_t *mp); 222 static int udp_close(queue_t *q); 223 static void udp_connect(queue_t *q, mblk_t *mp); 224 static void udp_disconnect(queue_t *q, mblk_t *mp); 225 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 226 int sys_error); 227 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 228 t_scalar_t tlierr, int unixerr); 229 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 230 cred_t *cr); 231 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 232 char *value, caddr_t cp, cred_t *cr); 233 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 234 char *value, caddr_t cp, cred_t *cr); 235 static void udp_icmp_error(queue_t *q, mblk_t *mp); 236 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 237 static void udp_info_req(queue_t *q, mblk_t *mp); 238 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 239 t_scalar_t addr_length); 240 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 241 cred_t *credp); 242 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 243 int *errorp, udpattrs_t *udpattrs); 244 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 245 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 246 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 247 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 248 cred_t *cr); 249 static void udp_report_item(mblk_t *mp, udp_t *udp); 250 static void udp_rput(queue_t *q, mblk_t *mp); 251 static void udp_rput_other(queue_t *, mblk_t *); 252 static int udp_rinfop(queue_t *q, infod_t *dp); 253 static int udp_rrw(queue_t *q, struiod_t *dp); 254 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 255 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 256 cred_t *cr); 257 static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *); 258 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 259 t_scalar_t destlen, t_scalar_t err); 260 static void udp_unbind(queue_t *q, mblk_t *mp); 261 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 262 boolean_t random); 263 static void udp_wput(queue_t *q, mblk_t *mp); 264 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 265 int *, boolean_t); 266 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 267 int *error); 268 static void udp_wput_other(queue_t *q, mblk_t *mp); 269 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 270 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 271 socklen_t addrlen); 272 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 273 274 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 275 static void udp_stack_fini(netstackid_t stackid, void *arg); 276 277 static void *udp_kstat_init(netstackid_t stackid); 278 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 279 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 280 static void udp_kstat2_fini(netstackid_t, kstat_t *); 281 static int udp_kstat_update(kstat_t *kp, int rw); 282 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 283 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 284 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 285 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 286 287 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 288 uint_t pkt_len); 289 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 290 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 291 static void udp_exit(conn_t *); 292 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 293 #ifdef DEBUG 294 static void udp_mode_assertions(udp_t *, int); 295 #endif /* DEBUG */ 296 297 major_t UDP6_MAJ; 298 #define UDP6 "udp6" 299 300 #define UDP_RECV_HIWATER (56 * 1024) 301 #define UDP_RECV_LOWATER 128 302 #define UDP_XMIT_HIWATER (56 * 1024) 303 #define UDP_XMIT_LOWATER 1024 304 305 static struct module_info udp_info = { 306 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 307 }; 308 309 static struct qinit udp_rinit = { 310 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 311 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 312 }; 313 314 static struct qinit udp_winit = { 315 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 316 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 317 }; 318 319 /* Support for just SNMP if UDP is not pushed directly over device IP */ 320 struct qinit udp_snmp_rinit = { 321 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 322 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 323 }; 324 325 struct qinit udp_snmp_winit = { 326 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 327 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 328 }; 329 330 struct streamtab udpinfo = { 331 &udp_rinit, &udp_winit 332 }; 333 334 static sin_t sin_null; /* Zero address for quick clears */ 335 static sin6_t sin6_null; /* Zero address for quick clears */ 336 337 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 338 339 /* Default structure copied into T_INFO_ACK messages */ 340 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 341 T_INFO_ACK, 342 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 343 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 344 T_INVALID, /* CDATA_size. udp does not support connect data. */ 345 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 346 sizeof (sin_t), /* ADDR_size. */ 347 0, /* OPT_size - not initialized here */ 348 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 349 T_CLTS, /* SERV_type. udp supports connection-less. */ 350 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 351 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 352 }; 353 354 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 355 356 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 357 T_INFO_ACK, 358 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 359 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 360 T_INVALID, /* CDATA_size. udp does not support connect data. */ 361 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 362 sizeof (sin6_t), /* ADDR_size. */ 363 0, /* OPT_size - not initialized here */ 364 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 365 T_CLTS, /* SERV_type. udp supports connection-less. */ 366 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 367 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 368 }; 369 370 /* largest UDP port number */ 371 #define UDP_MAX_PORT 65535 372 373 /* 374 * Table of ND variables supported by udp. These are loaded into us_nd 375 * in udp_open. 376 * All of these are alterable, within the min/max values given, at run time. 377 */ 378 /* BEGIN CSTYLED */ 379 udpparam_t udp_param_arr[] = { 380 /*min max value name */ 381 { 0L, 256, 32, "udp_wroff_extra" }, 382 { 1L, 255, 255, "udp_ipv4_ttl" }, 383 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 384 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 385 { 0, 1, 1, "udp_do_checksum" }, 386 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 387 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 388 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 389 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 390 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 391 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 392 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 393 }; 394 /* END CSTYLED */ 395 396 /* Setable in /etc/system */ 397 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 398 uint32_t udp_random_anon_port = 1; 399 400 /* 401 * Hook functions to enable cluster networking. 402 * On non-clustered systems these vectors must always be NULL 403 */ 404 405 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 406 uint8_t *laddrp, in_port_t lport) = NULL; 407 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 408 uint8_t *laddrp, in_port_t lport) = NULL; 409 410 typedef union T_primitives *t_primp_t; 411 412 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 413 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 414 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 415 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 416 (mp)->b_prev = (mblk_t *)proc; \ 417 if ((udp)->udp_mphead == NULL) \ 418 (udp)->udp_mphead = (mp); \ 419 else \ 420 (udp)->udp_mptail->b_next = (mp); \ 421 (udp)->udp_mptail = (mp); \ 422 (udp)->udp_mpcount++; \ 423 } 424 425 #define UDP_READERS_INCREF(udp) { \ 426 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 427 (udp)->udp_reader_count++; \ 428 } 429 430 #define UDP_READERS_DECREF(udp) { \ 431 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 432 (udp)->udp_reader_count--; \ 433 if ((udp)->udp_reader_count == 0) \ 434 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 435 } 436 437 #define UDP_SQUEUE_DECREF(udp) { \ 438 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 439 (udp)->udp_squeue_count--; \ 440 if ((udp)->udp_squeue_count == 0) \ 441 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 442 } 443 444 /* 445 * Notes on UDP endpoint synchronization: 446 * 447 * UDP needs exclusive operation on a per endpoint basis, when executing 448 * functions that modify the endpoint state. udp_rput_other() deals with 449 * packets with IP options, and processing these packets end up having 450 * to update the endpoint's option related state. udp_wput_other() deals 451 * with control operations from the top, e.g. connect() that needs to 452 * update the endpoint state. These could be synchronized using locks, 453 * but the current version uses squeues for this purpose. squeues may 454 * give performance improvement for certain cases such as connected UDP 455 * sockets; thus the framework allows for using squeues. 456 * 457 * The perimeter routines are described as follows: 458 * 459 * udp_enter(): 460 * Enter the UDP endpoint perimeter. 461 * 462 * udp_become_writer(): 463 * Become exclusive on the UDP endpoint. Specifies a function 464 * that will be called exclusively either immediately or later 465 * when the perimeter is available exclusively. 466 * 467 * udp_exit(): 468 * Exit the UDP perimeter. 469 * 470 * Entering UDP from the top or from the bottom must be done using 471 * udp_enter(). No lock must be held while attempting to enter the UDP 472 * perimeter. When finished, udp_exit() must be called to get out of 473 * the perimeter. 474 * 475 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 476 * multiple threads may enter a UDP endpoint concurrently. This is used 477 * for sending and/or receiving normal data. Control operations and other 478 * special cases call udp_become_writer() to become exclusive on a per 479 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 480 * by definition serializes access to the conn_t. When there are no more 481 * pending messages on the squeue for the UDP connection, the endpoint 482 * reverts to MT_HOT mode. During the interregnum when not all MT threads 483 * of an endpoint have finished, messages are queued in the UDP endpoint 484 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 485 * 486 * These modes have the following analogs: 487 * 488 * UDP_MT_HOT/udp_reader_count==0 none 489 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 490 * UDP_MT_QUEUED RW_WRITE_WANTED 491 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 492 * 493 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 494 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 495 * 496 * While in stable modes, UDP keeps track of the number of threads 497 * operating on the endpoint. The udp_reader_count variable represents 498 * the number of threads entering the endpoint as readers while it is 499 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 500 * is only a single reader, i.e. when this counter drops to 1. Likewise, 501 * udp_squeue_count represents the number of threads operating on the 502 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 503 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 504 * when this counter drops to 0. 505 * 506 * The default mode is set to UDP_MT_HOT and UDP alternates between 507 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 508 * 509 * Mode transition: 510 * ---------------------------------------------------------------- 511 * old mode Event New mode 512 * ---------------------------------------------------------------- 513 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 514 * and udp_reader_count == 1 515 * 516 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 517 * and udp_reader_count > 1 518 * 519 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 520 * 521 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 522 * internal UDP queue successfully 523 * moved to squeue AND udp_squeue_count != 0 524 * 525 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 526 * internal UDP queue successfully 527 * moved to squeue AND udp_squeue_count 528 * drops to zero 529 * 530 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 531 * ---------------------------------------------------------------- 532 */ 533 534 static queue_t * 535 UDP_WR(queue_t *q) 536 { 537 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 538 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 539 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 540 541 return (_WR(q)->q_next); 542 } 543 544 static queue_t * 545 UDP_RD(queue_t *q) 546 { 547 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 548 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 549 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 550 551 return (_RD(q)->q_next); 552 } 553 554 #ifdef DEBUG 555 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 556 #else 557 #define UDP_MODE_ASSERTIONS(udp, caller) 558 #endif 559 560 /* Invariants */ 561 #ifdef DEBUG 562 563 uint32_t udp_count[4]; 564 565 /* Context of udp_mode_assertions */ 566 #define UDP_ENTER 1 567 #define UDP_BECOME_WRITER 2 568 #define UDP_EXIT 3 569 570 static void 571 udp_mode_assertions(udp_t *udp, int caller) 572 { 573 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 574 575 switch (udp->udp_mode) { 576 case UDP_MT_HOT: 577 /* 578 * Messages have not yet been enqueued on the internal queue, 579 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 580 * by definition, there can't be any messages enqueued on the 581 * squeue. The UDP could be quiescent, so udp_reader_count 582 * could be zero at entry. 583 */ 584 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 585 udp->udp_squeue_count == 0); 586 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 587 udp_count[0]++; 588 break; 589 590 case UDP_MT_QUEUED: 591 /* 592 * The last MT thread to exit the udp perimeter empties the 593 * internal queue and then switches the UDP to 594 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 595 * mode, it means there must be at least 1 MT thread still in 596 * the perimeter and at least 1 message on the internal queue. 597 */ 598 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 599 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 600 udp_count[1]++; 601 break; 602 603 case UDP_QUEUED_SQUEUE: 604 /* 605 * The switch has happened from MT to SQUEUE. So there can't 606 * any MT threads. Messages could still pile up on the internal 607 * queue until the transition is complete and we move to 608 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 609 * since the squeue could drain any time. 610 */ 611 ASSERT(udp->udp_reader_count == 0); 612 udp_count[2]++; 613 break; 614 615 case UDP_SQUEUE: 616 /* 617 * The transition is complete. Thre can't be any messages on 618 * the internal queue. The udp could be quiescent or the squeue 619 * could drain any time, so we can't assert on nonzero 620 * udp_squeue_count during entry. Nor can we assert that 621 * udp_reader_count is zero, since, a reader thread could have 622 * directly become writer in line by calling udp_become_writer 623 * without going through the queued states. 624 */ 625 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 626 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 627 udp_count[3]++; 628 break; 629 } 630 } 631 #endif 632 633 #define _UDP_ENTER(connp, mp, proc, tag) { \ 634 udp_t *_udp = (connp)->conn_udp; \ 635 \ 636 mutex_enter(&(connp)->conn_lock); \ 637 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 638 mutex_exit(&(connp)->conn_lock); \ 639 freemsg(mp); \ 640 } else { \ 641 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 642 \ 643 switch (_udp->udp_mode) { \ 644 case UDP_MT_HOT: \ 645 /* We can execute as reader right away. */ \ 646 UDP_READERS_INCREF(_udp); \ 647 mutex_exit(&(connp)->conn_lock); \ 648 (*(proc))(connp, mp, (connp)->conn_sqp); \ 649 break; \ 650 \ 651 case UDP_SQUEUE: \ 652 /* \ 653 * We are in squeue mode, send the \ 654 * packet to the squeue \ 655 */ \ 656 _udp->udp_squeue_count++; \ 657 CONN_INC_REF_LOCKED(connp); \ 658 mutex_exit(&(connp)->conn_lock); \ 659 squeue_enter((connp)->conn_sqp, mp, proc, \ 660 connp, tag); \ 661 break; \ 662 \ 663 case UDP_MT_QUEUED: \ 664 case UDP_QUEUED_SQUEUE: \ 665 /* \ 666 * Some messages may have been enqueued \ 667 * ahead of us. Enqueue the new message \ 668 * at the tail of the internal queue to \ 669 * preserve message ordering. \ 670 */ \ 671 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 672 mutex_exit(&(connp)->conn_lock); \ 673 break; \ 674 } \ 675 } \ 676 } 677 678 static void 679 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 680 { 681 _UDP_ENTER(connp, mp, proc, tag); 682 } 683 684 static void 685 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 686 { 687 udp_t *udp; 688 689 udp = connp->conn_udp; 690 691 mutex_enter(&connp->conn_lock); 692 693 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 694 695 switch (udp->udp_mode) { 696 case UDP_MT_HOT: 697 if (udp->udp_reader_count == 1) { 698 /* 699 * We are the only MT thread. Switch to squeue mode 700 * immediately. 701 */ 702 udp->udp_mode = UDP_SQUEUE; 703 udp->udp_squeue_count = 1; 704 CONN_INC_REF_LOCKED(connp); 705 mutex_exit(&connp->conn_lock); 706 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 707 return; 708 } 709 /* FALLTHRU */ 710 711 case UDP_MT_QUEUED: 712 /* Enqueue the packet internally in UDP */ 713 udp->udp_mode = UDP_MT_QUEUED; 714 UDP_ENQUEUE_MP(udp, mp, proc, tag); 715 mutex_exit(&connp->conn_lock); 716 return; 717 718 case UDP_SQUEUE: 719 case UDP_QUEUED_SQUEUE: 720 /* 721 * We are already exclusive. i.e. we are already 722 * writer. Simply call the desired function. 723 */ 724 udp->udp_squeue_count++; 725 mutex_exit(&connp->conn_lock); 726 (*proc)(connp, mp, connp->conn_sqp); 727 return; 728 } 729 } 730 731 /* 732 * Transition from MT mode to SQUEUE mode, when the last MT thread 733 * is exiting the UDP perimeter. Move all messages from the internal 734 * udp queue to the squeue. A better way would be to move all the 735 * messages in one shot, this needs more support from the squeue framework 736 */ 737 static void 738 udp_switch_to_squeue(udp_t *udp) 739 { 740 mblk_t *mp; 741 mblk_t *mp_next; 742 sqproc_t proc; 743 uint8_t tag; 744 conn_t *connp = udp->udp_connp; 745 746 ASSERT(MUTEX_HELD(&connp->conn_lock)); 747 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 748 while (udp->udp_mphead != NULL) { 749 mp = udp->udp_mphead; 750 udp->udp_mphead = NULL; 751 udp->udp_mptail = NULL; 752 udp->udp_mpcount = 0; 753 udp->udp_mode = UDP_QUEUED_SQUEUE; 754 mutex_exit(&connp->conn_lock); 755 /* 756 * It is best not to hold any locks across the calls 757 * to squeue functions. Since we drop the lock we 758 * need to go back and check the udp_mphead once again 759 * after the squeue_fill and hence the while loop at 760 * the top of this function 761 */ 762 for (; mp != NULL; mp = mp_next) { 763 mp_next = mp->b_next; 764 proc = (sqproc_t)mp->b_prev; 765 tag = (uint8_t)((uintptr_t)mp->b_queue); 766 mp->b_next = NULL; 767 mp->b_prev = NULL; 768 mp->b_queue = NULL; 769 CONN_INC_REF(connp); 770 udp->udp_squeue_count++; 771 squeue_fill(connp->conn_sqp, mp, proc, connp, 772 tag); 773 } 774 mutex_enter(&connp->conn_lock); 775 } 776 /* 777 * udp_squeue_count of zero implies that the squeue has drained 778 * even before we arrived here (i.e. after the squeue_fill above) 779 */ 780 udp->udp_mode = (udp->udp_squeue_count != 0) ? 781 UDP_SQUEUE : UDP_MT_HOT; 782 } 783 784 #define _UDP_EXIT(connp) { \ 785 udp_t *_udp = (connp)->conn_udp; \ 786 \ 787 mutex_enter(&(connp)->conn_lock); \ 788 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 789 \ 790 switch (_udp->udp_mode) { \ 791 case UDP_MT_HOT: \ 792 UDP_READERS_DECREF(_udp); \ 793 mutex_exit(&(connp)->conn_lock); \ 794 break; \ 795 \ 796 case UDP_SQUEUE: \ 797 UDP_SQUEUE_DECREF(_udp); \ 798 if (_udp->udp_squeue_count == 0) \ 799 _udp->udp_mode = UDP_MT_HOT; \ 800 mutex_exit(&(connp)->conn_lock); \ 801 break; \ 802 \ 803 case UDP_MT_QUEUED: \ 804 /* \ 805 * If this is the last MT thread, we need to \ 806 * switch to squeue mode \ 807 */ \ 808 UDP_READERS_DECREF(_udp); \ 809 if (_udp->udp_reader_count == 0) \ 810 udp_switch_to_squeue(_udp); \ 811 mutex_exit(&(connp)->conn_lock); \ 812 break; \ 813 \ 814 case UDP_QUEUED_SQUEUE: \ 815 UDP_SQUEUE_DECREF(_udp); \ 816 /* \ 817 * Even if the udp_squeue_count drops to zero, we \ 818 * don't want to change udp_mode to UDP_MT_HOT here. \ 819 * The thread in udp_switch_to_squeue will take care \ 820 * of the transition to UDP_MT_HOT, after emptying \ 821 * any more new messages that have been enqueued in \ 822 * udp_mphead. \ 823 */ \ 824 mutex_exit(&(connp)->conn_lock); \ 825 break; \ 826 } \ 827 } 828 829 static void 830 udp_exit(conn_t *connp) 831 { 832 _UDP_EXIT(connp); 833 } 834 835 /* 836 * Return the next anonymous port in the privileged port range for 837 * bind checking. 838 * 839 * Trusted Extension (TX) notes: TX allows administrator to mark or 840 * reserve ports as Multilevel ports (MLP). MLP has special function 841 * on TX systems. Once a port is made MLP, it's not available as 842 * ordinary port. This creates "holes" in the port name space. It 843 * may be necessary to skip the "holes" find a suitable anon port. 844 */ 845 static in_port_t 846 udp_get_next_priv_port(udp_t *udp) 847 { 848 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 849 in_port_t nextport; 850 boolean_t restart = B_FALSE; 851 udp_stack_t *us = udp->udp_us; 852 853 retry: 854 if (next_priv_port < us->us_min_anonpriv_port || 855 next_priv_port >= IPPORT_RESERVED) { 856 next_priv_port = IPPORT_RESERVED - 1; 857 if (restart) 858 return (0); 859 restart = B_TRUE; 860 } 861 862 if (is_system_labeled() && 863 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 864 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 865 next_priv_port = nextport; 866 goto retry; 867 } 868 869 return (next_priv_port--); 870 } 871 872 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 873 /* ARGSUSED */ 874 static int 875 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 876 { 877 udp_fanout_t *udpf; 878 int i; 879 zoneid_t zoneid; 880 conn_t *connp; 881 udp_t *udp; 882 udp_stack_t *us; 883 884 connp = Q_TO_CONN(q); 885 udp = connp->conn_udp; 886 us = udp->udp_us; 887 888 /* Refer to comments in udp_status_report(). */ 889 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 890 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 891 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 892 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 893 return (0); 894 } 895 } 896 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 897 /* The following may work even if we cannot get a large buf. */ 898 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 899 return (0); 900 } 901 902 (void) mi_mpprintf(mp, 903 "UDP " MI_COL_HDRPAD_STR 904 /* 12345678[89ABCDEF] */ 905 " zone lport src addr dest addr port state"); 906 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 907 908 zoneid = connp->conn_zoneid; 909 910 for (i = 0; i < us->us_bind_fanout_size; i++) { 911 udpf = &us->us_bind_fanout[i]; 912 mutex_enter(&udpf->uf_lock); 913 914 /* Print the hash index. */ 915 udp = udpf->uf_udp; 916 if (zoneid != GLOBAL_ZONEID) { 917 /* skip to first entry in this zone; might be none */ 918 while (udp != NULL && 919 udp->udp_connp->conn_zoneid != zoneid) 920 udp = udp->udp_bind_hash; 921 } 922 if (udp != NULL) { 923 uint_t print_len, buf_len; 924 925 buf_len = mp->b_cont->b_datap->db_lim - 926 mp->b_cont->b_wptr; 927 print_len = snprintf((char *)mp->b_cont->b_wptr, 928 buf_len, "%d\n", i); 929 if (print_len < buf_len) { 930 mp->b_cont->b_wptr += print_len; 931 } else { 932 mp->b_cont->b_wptr += buf_len; 933 } 934 for (; udp != NULL; udp = udp->udp_bind_hash) { 935 if (zoneid == GLOBAL_ZONEID || 936 zoneid == udp->udp_connp->conn_zoneid) 937 udp_report_item(mp->b_cont, udp); 938 } 939 } 940 mutex_exit(&udpf->uf_lock); 941 } 942 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 943 return (0); 944 } 945 946 /* 947 * Hash list removal routine for udp_t structures. 948 */ 949 static void 950 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 951 { 952 udp_t *udpnext; 953 kmutex_t *lockp; 954 udp_stack_t *us = udp->udp_us; 955 956 if (udp->udp_ptpbhn == NULL) 957 return; 958 959 /* 960 * Extract the lock pointer in case there are concurrent 961 * hash_remove's for this instance. 962 */ 963 ASSERT(udp->udp_port != 0); 964 if (!caller_holds_lock) { 965 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 966 us->us_bind_fanout_size)].uf_lock; 967 ASSERT(lockp != NULL); 968 mutex_enter(lockp); 969 } 970 if (udp->udp_ptpbhn != NULL) { 971 udpnext = udp->udp_bind_hash; 972 if (udpnext != NULL) { 973 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 974 udp->udp_bind_hash = NULL; 975 } 976 *udp->udp_ptpbhn = udpnext; 977 udp->udp_ptpbhn = NULL; 978 } 979 if (!caller_holds_lock) { 980 mutex_exit(lockp); 981 } 982 } 983 984 static void 985 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 986 { 987 udp_t **udpp; 988 udp_t *udpnext; 989 990 ASSERT(MUTEX_HELD(&uf->uf_lock)); 991 if (udp->udp_ptpbhn != NULL) { 992 udp_bind_hash_remove(udp, B_TRUE); 993 } 994 udpp = &uf->uf_udp; 995 udpnext = udpp[0]; 996 if (udpnext != NULL) { 997 /* 998 * If the new udp bound to the INADDR_ANY address 999 * and the first one in the list is not bound to 1000 * INADDR_ANY we skip all entries until we find the 1001 * first one bound to INADDR_ANY. 1002 * This makes sure that applications binding to a 1003 * specific address get preference over those binding to 1004 * INADDR_ANY. 1005 */ 1006 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1007 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1008 while ((udpnext = udpp[0]) != NULL && 1009 !V6_OR_V4_INADDR_ANY( 1010 udpnext->udp_bound_v6src)) { 1011 udpp = &(udpnext->udp_bind_hash); 1012 } 1013 if (udpnext != NULL) 1014 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1015 } else { 1016 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1017 } 1018 } 1019 udp->udp_bind_hash = udpnext; 1020 udp->udp_ptpbhn = udpp; 1021 udpp[0] = udp; 1022 } 1023 1024 /* 1025 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1026 * passed to udp_wput. 1027 * It associates a port number and local address with the stream. 1028 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1029 * protocol type (IPPROTO_UDP) placed in the message following the address. 1030 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1031 * (Called as writer.) 1032 * 1033 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1034 * without setting SO_REUSEADDR. This is needed so that they 1035 * can be viewed as two independent transport protocols. 1036 * However, anonymouns ports are allocated from the same range to avoid 1037 * duplicating the us->us_next_port_to_try. 1038 */ 1039 static void 1040 udp_bind(queue_t *q, mblk_t *mp) 1041 { 1042 sin_t *sin; 1043 sin6_t *sin6; 1044 mblk_t *mp1; 1045 in_port_t port; /* Host byte order */ 1046 in_port_t requested_port; /* Host byte order */ 1047 struct T_bind_req *tbr; 1048 int count; 1049 in6_addr_t v6src; 1050 boolean_t bind_to_req_port_only; 1051 int loopmax; 1052 udp_fanout_t *udpf; 1053 in_port_t lport; /* Network byte order */ 1054 zoneid_t zoneid; 1055 conn_t *connp; 1056 udp_t *udp; 1057 boolean_t is_inaddr_any; 1058 mlp_type_t addrtype, mlptype; 1059 udp_stack_t *us; 1060 1061 connp = Q_TO_CONN(q); 1062 udp = connp->conn_udp; 1063 us = udp->udp_us; 1064 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1065 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1066 "udp_bind: bad req, len %u", 1067 (uint_t)(mp->b_wptr - mp->b_rptr)); 1068 udp_err_ack(q, mp, TPROTO, 0); 1069 return; 1070 } 1071 1072 if (udp->udp_state != TS_UNBND) { 1073 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1074 "udp_bind: bad state, %u", udp->udp_state); 1075 udp_err_ack(q, mp, TOUTSTATE, 0); 1076 return; 1077 } 1078 /* 1079 * Reallocate the message to make sure we have enough room for an 1080 * address and the protocol type. 1081 */ 1082 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1083 if (!mp1) { 1084 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1085 return; 1086 } 1087 1088 mp = mp1; 1089 tbr = (struct T_bind_req *)mp->b_rptr; 1090 switch (tbr->ADDR_length) { 1091 case 0: /* Request for a generic port */ 1092 tbr->ADDR_offset = sizeof (struct T_bind_req); 1093 if (udp->udp_family == AF_INET) { 1094 tbr->ADDR_length = sizeof (sin_t); 1095 sin = (sin_t *)&tbr[1]; 1096 *sin = sin_null; 1097 sin->sin_family = AF_INET; 1098 mp->b_wptr = (uchar_t *)&sin[1]; 1099 } else { 1100 ASSERT(udp->udp_family == AF_INET6); 1101 tbr->ADDR_length = sizeof (sin6_t); 1102 sin6 = (sin6_t *)&tbr[1]; 1103 *sin6 = sin6_null; 1104 sin6->sin6_family = AF_INET6; 1105 mp->b_wptr = (uchar_t *)&sin6[1]; 1106 } 1107 port = 0; 1108 break; 1109 1110 case sizeof (sin_t): /* Complete IPv4 address */ 1111 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1112 sizeof (sin_t)); 1113 if (sin == NULL || !OK_32PTR((char *)sin)) { 1114 udp_err_ack(q, mp, TSYSERR, EINVAL); 1115 return; 1116 } 1117 if (udp->udp_family != AF_INET || 1118 sin->sin_family != AF_INET) { 1119 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1120 return; 1121 } 1122 port = ntohs(sin->sin_port); 1123 break; 1124 1125 case sizeof (sin6_t): /* complete IPv6 address */ 1126 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1127 sizeof (sin6_t)); 1128 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1129 udp_err_ack(q, mp, TSYSERR, EINVAL); 1130 return; 1131 } 1132 if (udp->udp_family != AF_INET6 || 1133 sin6->sin6_family != AF_INET6) { 1134 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1135 return; 1136 } 1137 port = ntohs(sin6->sin6_port); 1138 break; 1139 1140 default: /* Invalid request */ 1141 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1142 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1143 udp_err_ack(q, mp, TBADADDR, 0); 1144 return; 1145 } 1146 1147 requested_port = port; 1148 1149 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1150 bind_to_req_port_only = B_FALSE; 1151 else /* T_BIND_REQ and requested_port != 0 */ 1152 bind_to_req_port_only = B_TRUE; 1153 1154 if (requested_port == 0) { 1155 /* 1156 * If the application passed in zero for the port number, it 1157 * doesn't care which port number we bind to. Get one in the 1158 * valid range. 1159 */ 1160 if (udp->udp_anon_priv_bind) { 1161 port = udp_get_next_priv_port(udp); 1162 } else { 1163 port = udp_update_next_port(udp, 1164 us->us_next_port_to_try, B_TRUE); 1165 } 1166 } else { 1167 /* 1168 * If the port is in the well-known privileged range, 1169 * make sure the caller was privileged. 1170 */ 1171 int i; 1172 boolean_t priv = B_FALSE; 1173 1174 if (port < us->us_smallest_nonpriv_port) { 1175 priv = B_TRUE; 1176 } else { 1177 for (i = 0; i < us->us_num_epriv_ports; i++) { 1178 if (port == us->us_epriv_ports[i]) { 1179 priv = B_TRUE; 1180 break; 1181 } 1182 } 1183 } 1184 1185 if (priv) { 1186 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1187 1188 if (secpolicy_net_privaddr(cr, port) != 0) { 1189 udp_err_ack(q, mp, TACCES, 0); 1190 return; 1191 } 1192 } 1193 } 1194 1195 if (port == 0) { 1196 udp_err_ack(q, mp, TNOADDR, 0); 1197 return; 1198 } 1199 1200 /* 1201 * Copy the source address into our udp structure. This address 1202 * may still be zero; if so, IP will fill in the correct address 1203 * each time an outbound packet is passed to it. 1204 */ 1205 if (udp->udp_family == AF_INET) { 1206 ASSERT(sin != NULL); 1207 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1208 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1209 udp->udp_ip_snd_options_len; 1210 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1211 } else { 1212 ASSERT(sin6 != NULL); 1213 v6src = sin6->sin6_addr; 1214 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1215 udp->udp_ipversion = IPV4_VERSION; 1216 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1217 UDPH_SIZE + udp->udp_ip_snd_options_len; 1218 } else { 1219 udp->udp_ipversion = IPV6_VERSION; 1220 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1221 } 1222 } 1223 1224 /* 1225 * If udp_reuseaddr is not set, then we have to make sure that 1226 * the IP address and port number the application requested 1227 * (or we selected for the application) is not being used by 1228 * another stream. If another stream is already using the 1229 * requested IP address and port, the behavior depends on 1230 * "bind_to_req_port_only". If set the bind fails; otherwise we 1231 * search for any an unused port to bind to the the stream. 1232 * 1233 * As per the BSD semantics, as modified by the Deering multicast 1234 * changes, if udp_reuseaddr is set, then we allow multiple binds 1235 * to the same port independent of the local IP address. 1236 * 1237 * This is slightly different than in SunOS 4.X which did not 1238 * support IP multicast. Note that the change implemented by the 1239 * Deering multicast code effects all binds - not only binding 1240 * to IP multicast addresses. 1241 * 1242 * Note that when binding to port zero we ignore SO_REUSEADDR in 1243 * order to guarantee a unique port. 1244 */ 1245 1246 count = 0; 1247 if (udp->udp_anon_priv_bind) { 1248 /* 1249 * loopmax = (IPPORT_RESERVED-1) - 1250 * us->us_min_anonpriv_port + 1 1251 */ 1252 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 1253 } else { 1254 loopmax = us->us_largest_anon_port - 1255 us->us_smallest_anon_port + 1; 1256 } 1257 1258 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1259 zoneid = connp->conn_zoneid; 1260 1261 for (;;) { 1262 udp_t *udp1; 1263 boolean_t found_exclbind = B_FALSE; 1264 1265 /* 1266 * Walk through the list of udp streams bound to 1267 * requested port with the same IP address. 1268 */ 1269 lport = htons(port); 1270 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 1271 us->us_bind_fanout_size)]; 1272 mutex_enter(&udpf->uf_lock); 1273 for (udp1 = udpf->uf_udp; udp1 != NULL; 1274 udp1 = udp1->udp_bind_hash) { 1275 if (lport != udp1->udp_port) 1276 continue; 1277 1278 /* 1279 * On a labeled system, we must treat bindings to ports 1280 * on shared IP addresses by sockets with MAC exemption 1281 * privilege as being in all zones, as there's 1282 * otherwise no way to identify the right receiver. 1283 */ 1284 if (zoneid != udp1->udp_connp->conn_zoneid && 1285 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 1286 continue; 1287 1288 /* 1289 * If UDP_EXCLBIND is set for either the bound or 1290 * binding endpoint, the semantics of bind 1291 * is changed according to the following chart. 1292 * 1293 * spec = specified address (v4 or v6) 1294 * unspec = unspecified address (v4 or v6) 1295 * A = specified addresses are different for endpoints 1296 * 1297 * bound bind to allowed? 1298 * ------------------------------------- 1299 * unspec unspec no 1300 * unspec spec no 1301 * spec unspec no 1302 * spec spec yes if A 1303 * 1304 * For labeled systems, SO_MAC_EXEMPT behaves the same 1305 * as UDP_EXCLBIND, except that zoneid is ignored. 1306 */ 1307 if (udp1->udp_exclbind || udp->udp_exclbind || 1308 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 1309 if (V6_OR_V4_INADDR_ANY( 1310 udp1->udp_bound_v6src) || 1311 is_inaddr_any || 1312 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1313 &v6src)) { 1314 found_exclbind = B_TRUE; 1315 break; 1316 } 1317 continue; 1318 } 1319 1320 /* 1321 * Check ipversion to allow IPv4 and IPv6 sockets to 1322 * have disjoint port number spaces. 1323 */ 1324 if (udp->udp_ipversion != udp1->udp_ipversion) { 1325 1326 /* 1327 * On the first time through the loop, if the 1328 * the user intentionally specified a 1329 * particular port number, then ignore any 1330 * bindings of the other protocol that may 1331 * conflict. This allows the user to bind IPv6 1332 * alone and get both v4 and v6, or bind both 1333 * both and get each seperately. On subsequent 1334 * times through the loop, we're checking a 1335 * port that we chose (not the user) and thus 1336 * we do not allow casual duplicate bindings. 1337 */ 1338 if (count == 0 && requested_port != 0) 1339 continue; 1340 } 1341 1342 /* 1343 * No difference depending on SO_REUSEADDR. 1344 * 1345 * If existing port is bound to a 1346 * non-wildcard IP address and 1347 * the requesting stream is bound to 1348 * a distinct different IP addresses 1349 * (non-wildcard, also), keep going. 1350 */ 1351 if (!is_inaddr_any && 1352 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1353 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1354 &v6src)) { 1355 continue; 1356 } 1357 break; 1358 } 1359 1360 if (!found_exclbind && 1361 (udp->udp_reuseaddr && requested_port != 0)) { 1362 break; 1363 } 1364 1365 if (udp1 == NULL) { 1366 /* 1367 * No other stream has this IP address 1368 * and port number. We can use it. 1369 */ 1370 break; 1371 } 1372 mutex_exit(&udpf->uf_lock); 1373 if (bind_to_req_port_only) { 1374 /* 1375 * We get here only when requested port 1376 * is bound (and only first of the for() 1377 * loop iteration). 1378 * 1379 * The semantics of this bind request 1380 * require it to fail so we return from 1381 * the routine (and exit the loop). 1382 * 1383 */ 1384 udp_err_ack(q, mp, TADDRBUSY, 0); 1385 return; 1386 } 1387 1388 if (udp->udp_anon_priv_bind) { 1389 port = udp_get_next_priv_port(udp); 1390 } else { 1391 if ((count == 0) && (requested_port != 0)) { 1392 /* 1393 * If the application wants us to find 1394 * a port, get one to start with. Set 1395 * requested_port to 0, so that we will 1396 * update us->us_next_port_to_try below. 1397 */ 1398 port = udp_update_next_port(udp, 1399 us->us_next_port_to_try, B_TRUE); 1400 requested_port = 0; 1401 } else { 1402 port = udp_update_next_port(udp, port + 1, 1403 B_FALSE); 1404 } 1405 } 1406 1407 if (port == 0 || ++count >= loopmax) { 1408 /* 1409 * We've tried every possible port number and 1410 * there are none available, so send an error 1411 * to the user. 1412 */ 1413 udp_err_ack(q, mp, TNOADDR, 0); 1414 return; 1415 } 1416 } 1417 1418 /* 1419 * Copy the source address into our udp structure. This address 1420 * may still be zero; if so, ip will fill in the correct address 1421 * each time an outbound packet is passed to it. 1422 * If we are binding to a broadcast or multicast address udp_rput 1423 * will clear the source address when it receives the T_BIND_ACK. 1424 */ 1425 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1426 udp->udp_port = lport; 1427 /* 1428 * Now reset the the next anonymous port if the application requested 1429 * an anonymous port, or we handed out the next anonymous port. 1430 */ 1431 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1432 us->us_next_port_to_try = port + 1; 1433 } 1434 1435 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1436 if (udp->udp_family == AF_INET) { 1437 sin->sin_port = udp->udp_port; 1438 } else { 1439 int error; 1440 1441 sin6->sin6_port = udp->udp_port; 1442 /* Rebuild the header template */ 1443 error = udp_build_hdrs(q, udp); 1444 if (error != 0) { 1445 mutex_exit(&udpf->uf_lock); 1446 udp_err_ack(q, mp, TSYSERR, error); 1447 return; 1448 } 1449 } 1450 udp->udp_state = TS_IDLE; 1451 udp_bind_hash_insert(udpf, udp); 1452 mutex_exit(&udpf->uf_lock); 1453 1454 if (cl_inet_bind) { 1455 /* 1456 * Running in cluster mode - register bind information 1457 */ 1458 if (udp->udp_ipversion == IPV4_VERSION) { 1459 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1460 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1461 (in_port_t)udp->udp_port); 1462 } else { 1463 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1464 (uint8_t *)&(udp->udp_v6src), 1465 (in_port_t)udp->udp_port); 1466 } 1467 1468 } 1469 1470 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1471 if (is_system_labeled() && (!connp->conn_anon_port || 1472 connp->conn_anon_mlp)) { 1473 uint16_t mlpport; 1474 cred_t *cr = connp->conn_cred; 1475 zone_t *zone; 1476 1477 zone = crgetzone(cr); 1478 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1479 mlptSingle; 1480 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 1481 &v6src, udp->udp_us->us_netstack->netstack_ip); 1482 if (addrtype == mlptSingle) { 1483 udp_err_ack(q, mp, TNOADDR, 0); 1484 connp->conn_anon_port = B_FALSE; 1485 connp->conn_mlp_type = mlptSingle; 1486 return; 1487 } 1488 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1489 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1490 addrtype); 1491 if (mlptype != mlptSingle && 1492 (connp->conn_mlp_type == mlptSingle || 1493 secpolicy_net_bindmlp(cr) != 0)) { 1494 if (udp->udp_debug) { 1495 (void) strlog(UDP_MOD_ID, 0, 1, 1496 SL_ERROR|SL_TRACE, 1497 "udp_bind: no priv for multilevel port %d", 1498 mlpport); 1499 } 1500 udp_err_ack(q, mp, TACCES, 0); 1501 connp->conn_anon_port = B_FALSE; 1502 connp->conn_mlp_type = mlptSingle; 1503 return; 1504 } 1505 1506 /* 1507 * If we're specifically binding a shared IP address and the 1508 * port is MLP on shared addresses, then check to see if this 1509 * zone actually owns the MLP. Reject if not. 1510 */ 1511 if (mlptype == mlptShared && addrtype == mlptShared) { 1512 /* 1513 * No need to handle exclusive-stack zones since 1514 * ALL_ZONES only applies to the shared stack. 1515 */ 1516 zoneid_t mlpzone; 1517 1518 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1519 htons(mlpport)); 1520 if (connp->conn_zoneid != mlpzone) { 1521 if (udp->udp_debug) { 1522 (void) strlog(UDP_MOD_ID, 0, 1, 1523 SL_ERROR|SL_TRACE, 1524 "udp_bind: attempt to bind port " 1525 "%d on shared addr in zone %d " 1526 "(should be %d)", 1527 mlpport, connp->conn_zoneid, 1528 mlpzone); 1529 } 1530 udp_err_ack(q, mp, TACCES, 0); 1531 connp->conn_anon_port = B_FALSE; 1532 connp->conn_mlp_type = mlptSingle; 1533 return; 1534 } 1535 } 1536 if (connp->conn_anon_port) { 1537 int error; 1538 1539 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1540 port, B_TRUE); 1541 if (error != 0) { 1542 if (udp->udp_debug) { 1543 (void) strlog(UDP_MOD_ID, 0, 1, 1544 SL_ERROR|SL_TRACE, 1545 "udp_bind: cannot establish anon " 1546 "MLP for port %d", port); 1547 } 1548 udp_err_ack(q, mp, TACCES, 0); 1549 connp->conn_anon_port = B_FALSE; 1550 connp->conn_mlp_type = mlptSingle; 1551 return; 1552 } 1553 } 1554 connp->conn_mlp_type = mlptype; 1555 } 1556 1557 /* Pass the protocol number in the message following the address. */ 1558 *mp->b_wptr++ = IPPROTO_UDP; 1559 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1560 /* 1561 * Append a request for an IRE if udp_v6src not 1562 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1563 */ 1564 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1565 if (!mp->b_cont) { 1566 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1567 return; 1568 } 1569 mp->b_cont->b_wptr += sizeof (ire_t); 1570 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1571 } 1572 if (udp->udp_family == AF_INET6) 1573 mp = ip_bind_v6(q, mp, connp, NULL); 1574 else 1575 mp = ip_bind_v4(q, mp, connp); 1576 1577 if (mp != NULL) 1578 udp_rput_other(_RD(q), mp); 1579 else 1580 CONN_INC_REF(connp); 1581 } 1582 1583 1584 void 1585 udp_resume_bind(conn_t *connp, mblk_t *mp) 1586 { 1587 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1588 } 1589 1590 /* 1591 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1592 */ 1593 /* ARGSUSED */ 1594 static void 1595 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1596 { 1597 conn_t *connp = arg; 1598 1599 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1600 1601 udp_rput_other(connp->conn_rq, mp); 1602 1603 CONN_OPER_PENDING_DONE(connp); 1604 udp_exit(connp); 1605 } 1606 1607 /* 1608 * This routine handles each T_CONN_REQ message passed to udp. It 1609 * associates a default destination address with the stream. 1610 * 1611 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1612 * T_BIND_REQ - specifying local and remote address/port 1613 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1614 * T_OK_ACK - for the T_CONN_REQ 1615 * T_CONN_CON - to keep the TPI user happy 1616 * 1617 * The connect completes in udp_rput. 1618 * When a T_BIND_ACK is received information is extracted from the IRE 1619 * and the two appended messages are sent to the TPI user. 1620 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1621 * it to an error ack for the appropriate primitive. 1622 */ 1623 static void 1624 udp_connect(queue_t *q, mblk_t *mp) 1625 { 1626 sin6_t *sin6; 1627 sin_t *sin; 1628 struct T_conn_req *tcr; 1629 in6_addr_t v6dst; 1630 ipaddr_t v4dst; 1631 uint16_t dstport; 1632 uint32_t flowinfo; 1633 mblk_t *mp1, *mp2; 1634 udp_fanout_t *udpf; 1635 udp_t *udp, *udp1; 1636 udp_stack_t *us; 1637 1638 udp = Q_TO_UDP(q); 1639 1640 tcr = (struct T_conn_req *)mp->b_rptr; 1641 us = udp->udp_us; 1642 1643 /* A bit of sanity checking */ 1644 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1645 udp_err_ack(q, mp, TPROTO, 0); 1646 return; 1647 } 1648 /* 1649 * This UDP must have bound to a port already before doing 1650 * a connect. 1651 */ 1652 if (udp->udp_state == TS_UNBND) { 1653 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1654 "udp_connect: bad state, %u", udp->udp_state); 1655 udp_err_ack(q, mp, TOUTSTATE, 0); 1656 return; 1657 } 1658 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1659 1660 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1661 us->us_bind_fanout_size)]; 1662 1663 if (udp->udp_state == TS_DATA_XFER) { 1664 /* Already connected - clear out state */ 1665 mutex_enter(&udpf->uf_lock); 1666 udp->udp_v6src = udp->udp_bound_v6src; 1667 udp->udp_state = TS_IDLE; 1668 mutex_exit(&udpf->uf_lock); 1669 } 1670 1671 if (tcr->OPT_length != 0) { 1672 udp_err_ack(q, mp, TBADOPT, 0); 1673 return; 1674 } 1675 1676 /* 1677 * Determine packet type based on type of address passed in 1678 * the request should contain an IPv4 or IPv6 address. 1679 * Make sure that address family matches the type of 1680 * family of the the address passed down 1681 */ 1682 switch (tcr->DEST_length) { 1683 default: 1684 udp_err_ack(q, mp, TBADADDR, 0); 1685 return; 1686 1687 case sizeof (sin_t): 1688 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1689 sizeof (sin_t)); 1690 if (sin == NULL || !OK_32PTR((char *)sin)) { 1691 udp_err_ack(q, mp, TSYSERR, EINVAL); 1692 return; 1693 } 1694 if (udp->udp_family != AF_INET || 1695 sin->sin_family != AF_INET) { 1696 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1697 return; 1698 } 1699 v4dst = sin->sin_addr.s_addr; 1700 dstport = sin->sin_port; 1701 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1702 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1703 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1704 udp->udp_ip_snd_options_len; 1705 break; 1706 1707 case sizeof (sin6_t): 1708 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1709 sizeof (sin6_t)); 1710 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1711 udp_err_ack(q, mp, TSYSERR, EINVAL); 1712 return; 1713 } 1714 if (udp->udp_family != AF_INET6 || 1715 sin6->sin6_family != AF_INET6) { 1716 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1717 return; 1718 } 1719 v6dst = sin6->sin6_addr; 1720 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1721 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1722 udp->udp_ipversion = IPV4_VERSION; 1723 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1724 UDPH_SIZE + udp->udp_ip_snd_options_len; 1725 flowinfo = 0; 1726 } else { 1727 udp->udp_ipversion = IPV6_VERSION; 1728 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1729 flowinfo = sin6->sin6_flowinfo; 1730 } 1731 dstport = sin6->sin6_port; 1732 break; 1733 } 1734 if (dstport == 0) { 1735 udp_err_ack(q, mp, TBADADDR, 0); 1736 return; 1737 } 1738 1739 /* 1740 * Create a default IP header with no IP options. 1741 */ 1742 udp->udp_dstport = dstport; 1743 if (udp->udp_ipversion == IPV4_VERSION) { 1744 /* 1745 * Interpret a zero destination to mean loopback. 1746 * Update the T_CONN_REQ (sin/sin6) since it is used to 1747 * generate the T_CONN_CON. 1748 */ 1749 if (v4dst == INADDR_ANY) { 1750 v4dst = htonl(INADDR_LOOPBACK); 1751 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1752 if (udp->udp_family == AF_INET) { 1753 sin->sin_addr.s_addr = v4dst; 1754 } else { 1755 sin6->sin6_addr = v6dst; 1756 } 1757 } 1758 udp->udp_v6dst = v6dst; 1759 udp->udp_flowinfo = 0; 1760 1761 /* 1762 * If the destination address is multicast and 1763 * an outgoing multicast interface has been set, 1764 * use the address of that interface as our 1765 * source address if no source address has been set. 1766 */ 1767 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1768 CLASSD(v4dst) && 1769 udp->udp_multicast_if_addr != INADDR_ANY) { 1770 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1771 &udp->udp_v6src); 1772 } 1773 } else { 1774 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1775 /* 1776 * Interpret a zero destination to mean loopback. 1777 * Update the T_CONN_REQ (sin/sin6) since it is used to 1778 * generate the T_CONN_CON. 1779 */ 1780 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1781 v6dst = ipv6_loopback; 1782 sin6->sin6_addr = v6dst; 1783 } 1784 udp->udp_v6dst = v6dst; 1785 udp->udp_flowinfo = flowinfo; 1786 /* 1787 * If the destination address is multicast and 1788 * an outgoing multicast interface has been set, 1789 * then the ip bind logic will pick the correct source 1790 * address (i.e. matching the outgoing multicast interface). 1791 */ 1792 } 1793 1794 /* 1795 * Verify that the src/port/dst/port and zoneid is unique for all 1796 * connections in TS_DATA_XFER 1797 */ 1798 mutex_enter(&udpf->uf_lock); 1799 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1800 if (udp1->udp_state != TS_DATA_XFER) 1801 continue; 1802 if (udp->udp_port != udp1->udp_port || 1803 udp->udp_ipversion != udp1->udp_ipversion || 1804 dstport != udp1->udp_dstport || 1805 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1806 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 1807 udp->udp_connp->conn_zoneid != udp1->udp_connp->conn_zoneid) 1808 continue; 1809 mutex_exit(&udpf->uf_lock); 1810 udp_err_ack(q, mp, TBADADDR, 0); 1811 return; 1812 } 1813 udp->udp_state = TS_DATA_XFER; 1814 mutex_exit(&udpf->uf_lock); 1815 1816 /* 1817 * Send down bind to IP to verify that there is a route 1818 * and to determine the source address. 1819 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1820 */ 1821 if (udp->udp_family == AF_INET) 1822 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1823 else 1824 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1825 if (mp1 == NULL) { 1826 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1827 bind_failed: 1828 mutex_enter(&udpf->uf_lock); 1829 udp->udp_state = TS_IDLE; 1830 mutex_exit(&udpf->uf_lock); 1831 return; 1832 } 1833 1834 /* 1835 * We also have to send a connection confirmation to 1836 * keep TLI happy. Prepare it for udp_rput. 1837 */ 1838 if (udp->udp_family == AF_INET) 1839 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1840 sizeof (*sin), NULL, 0); 1841 else 1842 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1843 sizeof (*sin6), NULL, 0); 1844 if (mp2 == NULL) { 1845 freemsg(mp1); 1846 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1847 goto bind_failed; 1848 } 1849 1850 mp = mi_tpi_ok_ack_alloc(mp); 1851 if (mp == NULL) { 1852 /* Unable to reuse the T_CONN_REQ for the ack. */ 1853 freemsg(mp2); 1854 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1855 goto bind_failed; 1856 } 1857 1858 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1859 linkb(mp1, mp); 1860 linkb(mp1, mp2); 1861 1862 mblk_setcred(mp1, udp->udp_connp->conn_cred); 1863 if (udp->udp_family == AF_INET) 1864 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1865 else 1866 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1867 1868 if (mp1 != NULL) 1869 udp_rput_other(_RD(q), mp1); 1870 else 1871 CONN_INC_REF(udp->udp_connp); 1872 } 1873 1874 static int 1875 udp_close(queue_t *q) 1876 { 1877 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1878 udp_t *udp; 1879 queue_t *ip_rq = RD(UDP_WR(q)); 1880 1881 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1882 udp = connp->conn_udp; 1883 1884 ip_quiesce_conn(connp); 1885 /* 1886 * Disable read-side synchronous stream 1887 * interface and drain any queued data. 1888 */ 1889 udp_rcv_drain(q, udp, B_TRUE); 1890 ASSERT(!udp->udp_direct_sockfs); 1891 1892 qprocsoff(q); 1893 1894 /* restore IP module's high and low water marks to default values */ 1895 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1896 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1897 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1898 1899 ASSERT(udp->udp_rcv_cnt == 0); 1900 ASSERT(udp->udp_rcv_msgcnt == 0); 1901 ASSERT(udp->udp_rcv_list_head == NULL); 1902 ASSERT(udp->udp_rcv_list_tail == NULL); 1903 1904 udp_close_free(connp); 1905 1906 /* 1907 * Restore connp as an IP endpoint. 1908 * Locking required to prevent a race with udp_snmp_get()/ 1909 * ipcl_get_next_conn(), which selects conn_t which are 1910 * IPCL_UDP and not CONN_CONDEMNED. 1911 */ 1912 mutex_enter(&connp->conn_lock); 1913 connp->conn_flags &= ~IPCL_UDP; 1914 connp->conn_state_flags &= 1915 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1916 connp->conn_ulp_labeled = B_FALSE; 1917 mutex_exit(&connp->conn_lock); 1918 1919 return (0); 1920 } 1921 1922 /* 1923 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1924 */ 1925 void 1926 udp_quiesce_conn(conn_t *connp) 1927 { 1928 udp_t *udp = connp->conn_udp; 1929 1930 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1931 /* 1932 * Running in cluster mode - register unbind information 1933 */ 1934 if (udp->udp_ipversion == IPV4_VERSION) { 1935 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1936 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1937 (in_port_t)udp->udp_port); 1938 } else { 1939 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1940 (uint8_t *)(&(udp->udp_v6src)), 1941 (in_port_t)udp->udp_port); 1942 } 1943 } 1944 1945 udp_bind_hash_remove(udp, B_FALSE); 1946 1947 mutex_enter(&connp->conn_lock); 1948 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1949 udp->udp_mode != UDP_MT_HOT) { 1950 cv_wait(&connp->conn_cv, &connp->conn_lock); 1951 } 1952 mutex_exit(&connp->conn_lock); 1953 } 1954 1955 void 1956 udp_close_free(conn_t *connp) 1957 { 1958 udp_t *udp = connp->conn_udp; 1959 1960 /* If there are any options associated with the stream, free them. */ 1961 if (udp->udp_ip_snd_options) { 1962 mi_free((char *)udp->udp_ip_snd_options); 1963 udp->udp_ip_snd_options = NULL; 1964 } 1965 1966 if (udp->udp_ip_rcv_options) { 1967 mi_free((char *)udp->udp_ip_rcv_options); 1968 udp->udp_ip_rcv_options = NULL; 1969 } 1970 1971 /* Free memory associated with sticky options */ 1972 if (udp->udp_sticky_hdrs_len != 0) { 1973 kmem_free(udp->udp_sticky_hdrs, 1974 udp->udp_sticky_hdrs_len); 1975 udp->udp_sticky_hdrs = NULL; 1976 udp->udp_sticky_hdrs_len = 0; 1977 } 1978 1979 ip6_pkt_free(&udp->udp_sticky_ipp); 1980 1981 udp->udp_connp = NULL; 1982 netstack_rele(udp->udp_us->us_netstack); 1983 1984 connp->conn_udp = NULL; 1985 kmem_cache_free(udp_cache, udp); 1986 } 1987 1988 /* 1989 * This routine handles each T_DISCON_REQ message passed to udp 1990 * as an indicating that UDP is no longer connected. This results 1991 * in sending a T_BIND_REQ to IP to restore the binding to just 1992 * the local address/port. 1993 * 1994 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1995 * T_BIND_REQ - specifying just the local address/port 1996 * T_OK_ACK - for the T_DISCON_REQ 1997 * 1998 * The disconnect completes in udp_rput. 1999 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 2000 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 2001 * it to an error ack for the appropriate primitive. 2002 */ 2003 static void 2004 udp_disconnect(queue_t *q, mblk_t *mp) 2005 { 2006 udp_t *udp = Q_TO_UDP(q); 2007 mblk_t *mp1; 2008 udp_fanout_t *udpf; 2009 udp_stack_t *us; 2010 2011 us = udp->udp_us; 2012 if (udp->udp_state != TS_DATA_XFER) { 2013 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2014 "udp_disconnect: bad state, %u", udp->udp_state); 2015 udp_err_ack(q, mp, TOUTSTATE, 0); 2016 return; 2017 } 2018 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 2019 us->us_bind_fanout_size)]; 2020 mutex_enter(&udpf->uf_lock); 2021 udp->udp_v6src = udp->udp_bound_v6src; 2022 udp->udp_state = TS_IDLE; 2023 mutex_exit(&udpf->uf_lock); 2024 2025 /* 2026 * Send down bind to IP to remove the full binding and revert 2027 * to the local address binding. 2028 */ 2029 if (udp->udp_family == AF_INET) 2030 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 2031 else 2032 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 2033 if (mp1 == NULL) { 2034 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2035 return; 2036 } 2037 mp = mi_tpi_ok_ack_alloc(mp); 2038 if (mp == NULL) { 2039 /* Unable to reuse the T_DISCON_REQ for the ack. */ 2040 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 2041 return; 2042 } 2043 2044 if (udp->udp_family == AF_INET6) { 2045 int error; 2046 2047 /* Rebuild the header template */ 2048 error = udp_build_hdrs(q, udp); 2049 if (error != 0) { 2050 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 2051 freemsg(mp1); 2052 return; 2053 } 2054 } 2055 mutex_enter(&udpf->uf_lock); 2056 udp->udp_discon_pending = 1; 2057 mutex_exit(&udpf->uf_lock); 2058 2059 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 2060 linkb(mp1, mp); 2061 2062 if (udp->udp_family == AF_INET6) 2063 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 2064 else 2065 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 2066 2067 if (mp1 != NULL) 2068 udp_rput_other(_RD(q), mp1); 2069 else 2070 CONN_INC_REF(udp->udp_connp); 2071 } 2072 2073 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 2074 static void 2075 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 2076 { 2077 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2078 putnext(UDP_RD(q), mp); 2079 } 2080 2081 /* Shorthand to generate and send TPI error acks to our client */ 2082 static void 2083 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2084 int sys_error) 2085 { 2086 struct T_error_ack *teackp; 2087 2088 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2089 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2090 teackp = (struct T_error_ack *)mp->b_rptr; 2091 teackp->ERROR_prim = primitive; 2092 teackp->TLI_error = t_error; 2093 teackp->UNIX_error = sys_error; 2094 putnext(UDP_RD(q), mp); 2095 } 2096 } 2097 2098 /*ARGSUSED*/ 2099 static int 2100 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2101 { 2102 int i; 2103 udp_t *udp = Q_TO_UDP(q); 2104 udp_stack_t *us = udp->udp_us; 2105 2106 for (i = 0; i < us->us_num_epriv_ports; i++) { 2107 if (us->us_epriv_ports[i] != 0) 2108 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 2109 } 2110 return (0); 2111 } 2112 2113 /* ARGSUSED */ 2114 static int 2115 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2116 cred_t *cr) 2117 { 2118 long new_value; 2119 int i; 2120 udp_t *udp = Q_TO_UDP(q); 2121 udp_stack_t *us = udp->udp_us; 2122 2123 /* 2124 * Fail the request if the new value does not lie within the 2125 * port number limits. 2126 */ 2127 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2128 new_value <= 0 || new_value >= 65536) { 2129 return (EINVAL); 2130 } 2131 2132 /* Check if the value is already in the list */ 2133 for (i = 0; i < us->us_num_epriv_ports; i++) { 2134 if (new_value == us->us_epriv_ports[i]) { 2135 return (EEXIST); 2136 } 2137 } 2138 /* Find an empty slot */ 2139 for (i = 0; i < us->us_num_epriv_ports; i++) { 2140 if (us->us_epriv_ports[i] == 0) 2141 break; 2142 } 2143 if (i == us->us_num_epriv_ports) { 2144 return (EOVERFLOW); 2145 } 2146 2147 /* Set the new value */ 2148 us->us_epriv_ports[i] = (in_port_t)new_value; 2149 return (0); 2150 } 2151 2152 /* ARGSUSED */ 2153 static int 2154 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2155 cred_t *cr) 2156 { 2157 long new_value; 2158 int i; 2159 udp_t *udp = Q_TO_UDP(q); 2160 udp_stack_t *us = udp->udp_us; 2161 2162 /* 2163 * Fail the request if the new value does not lie within the 2164 * port number limits. 2165 */ 2166 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2167 new_value <= 0 || new_value >= 65536) { 2168 return (EINVAL); 2169 } 2170 2171 /* Check that the value is already in the list */ 2172 for (i = 0; i < us->us_num_epriv_ports; i++) { 2173 if (us->us_epriv_ports[i] == new_value) 2174 break; 2175 } 2176 if (i == us->us_num_epriv_ports) { 2177 return (ESRCH); 2178 } 2179 2180 /* Clear the value */ 2181 us->us_epriv_ports[i] = 0; 2182 return (0); 2183 } 2184 2185 /* At minimum we need 4 bytes of UDP header */ 2186 #define ICMP_MIN_UDP_HDR 4 2187 2188 /* 2189 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2190 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2191 * Assumes that IP has pulled up everything up to and including the ICMP header. 2192 * An M_CTL could potentially come here from some other module (i.e. if UDP 2193 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2194 * does not have enough ICMP information , following STREAMS conventions, 2195 * we send it upstream assuming it is an M_CTL we don't understand. 2196 */ 2197 static void 2198 udp_icmp_error(queue_t *q, mblk_t *mp) 2199 { 2200 icmph_t *icmph; 2201 ipha_t *ipha; 2202 int iph_hdr_length; 2203 udpha_t *udpha; 2204 sin_t sin; 2205 sin6_t sin6; 2206 mblk_t *mp1; 2207 int error = 0; 2208 size_t mp_size = MBLKL(mp); 2209 udp_t *udp = Q_TO_UDP(q); 2210 2211 /* 2212 * Assume IP provides aligned packets - otherwise toss 2213 */ 2214 if (!OK_32PTR(mp->b_rptr)) { 2215 freemsg(mp); 2216 return; 2217 } 2218 2219 /* 2220 * Verify that we have a complete IP header and the application has 2221 * asked for errors. If not, send it upstream. 2222 */ 2223 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2224 noticmpv4: 2225 putnext(UDP_RD(q), mp); 2226 return; 2227 } 2228 2229 ipha = (ipha_t *)mp->b_rptr; 2230 /* 2231 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2232 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2233 */ 2234 switch (IPH_HDR_VERSION(ipha)) { 2235 case IPV6_VERSION: 2236 udp_icmp_error_ipv6(q, mp); 2237 return; 2238 case IPV4_VERSION: 2239 break; 2240 default: 2241 goto noticmpv4; 2242 } 2243 2244 /* Skip past the outer IP and ICMP headers */ 2245 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2246 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2247 /* 2248 * If we don't have the correct outer IP header length or if the ULP 2249 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2250 * send the packet upstream. 2251 */ 2252 if (iph_hdr_length < sizeof (ipha_t) || 2253 ipha->ipha_protocol != IPPROTO_ICMP || 2254 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2255 goto noticmpv4; 2256 } 2257 ipha = (ipha_t *)&icmph[1]; 2258 2259 /* Skip past the inner IP and find the ULP header */ 2260 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2261 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2262 /* 2263 * If we don't have the correct inner IP header length or if the ULP 2264 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2265 * bytes of UDP header, send it upstream. 2266 */ 2267 if (iph_hdr_length < sizeof (ipha_t) || 2268 ipha->ipha_protocol != IPPROTO_UDP || 2269 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2270 goto noticmpv4; 2271 } 2272 2273 switch (icmph->icmph_type) { 2274 case ICMP_DEST_UNREACHABLE: 2275 switch (icmph->icmph_code) { 2276 case ICMP_FRAGMENTATION_NEEDED: 2277 /* 2278 * IP has already adjusted the path MTU. 2279 * XXX Somehow pass MTU indication to application? 2280 */ 2281 break; 2282 case ICMP_PORT_UNREACHABLE: 2283 case ICMP_PROTOCOL_UNREACHABLE: 2284 error = ECONNREFUSED; 2285 break; 2286 default: 2287 /* Transient errors */ 2288 break; 2289 } 2290 break; 2291 default: 2292 /* Transient errors */ 2293 break; 2294 } 2295 if (error == 0) { 2296 freemsg(mp); 2297 return; 2298 } 2299 2300 switch (udp->udp_family) { 2301 case AF_INET: 2302 sin = sin_null; 2303 sin.sin_family = AF_INET; 2304 sin.sin_addr.s_addr = ipha->ipha_dst; 2305 sin.sin_port = udpha->uha_dst_port; 2306 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2307 error); 2308 break; 2309 case AF_INET6: 2310 sin6 = sin6_null; 2311 sin6.sin6_family = AF_INET6; 2312 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2313 sin6.sin6_port = udpha->uha_dst_port; 2314 2315 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2316 NULL, 0, error); 2317 break; 2318 } 2319 if (mp1) 2320 putnext(UDP_RD(q), mp1); 2321 freemsg(mp); 2322 } 2323 2324 /* 2325 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2326 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2327 * Assumes that IP has pulled up all the extension headers as well as the 2328 * ICMPv6 header. 2329 * An M_CTL could potentially come here from some other module (i.e. if UDP 2330 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2331 * does not have enough ICMP information , following STREAMS conventions, 2332 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2333 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2334 * field (when cast to ipha_t in udp_icmp_error). 2335 */ 2336 static void 2337 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2338 { 2339 icmp6_t *icmp6; 2340 ip6_t *ip6h, *outer_ip6h; 2341 uint16_t hdr_length; 2342 uint8_t *nexthdrp; 2343 udpha_t *udpha; 2344 sin6_t sin6; 2345 mblk_t *mp1; 2346 int error = 0; 2347 size_t mp_size = MBLKL(mp); 2348 udp_t *udp = Q_TO_UDP(q); 2349 2350 /* 2351 * Verify that we have a complete IP header. If not, send it upstream. 2352 */ 2353 if (mp_size < sizeof (ip6_t)) { 2354 noticmpv6: 2355 putnext(UDP_RD(q), mp); 2356 return; 2357 } 2358 2359 outer_ip6h = (ip6_t *)mp->b_rptr; 2360 /* 2361 * Verify this is an ICMPV6 packet, else send it upstream 2362 */ 2363 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2364 hdr_length = IPV6_HDR_LEN; 2365 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2366 &nexthdrp) || 2367 *nexthdrp != IPPROTO_ICMPV6) { 2368 goto noticmpv6; 2369 } 2370 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2371 ip6h = (ip6_t *)&icmp6[1]; 2372 /* 2373 * Verify we have a complete ICMP and inner IP header. 2374 */ 2375 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2376 goto noticmpv6; 2377 2378 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2379 goto noticmpv6; 2380 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2381 /* 2382 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2383 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2384 * packet upstream. 2385 */ 2386 if ((*nexthdrp != IPPROTO_UDP) || 2387 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2388 goto noticmpv6; 2389 } 2390 2391 switch (icmp6->icmp6_type) { 2392 case ICMP6_DST_UNREACH: 2393 switch (icmp6->icmp6_code) { 2394 case ICMP6_DST_UNREACH_NOPORT: 2395 error = ECONNREFUSED; 2396 break; 2397 case ICMP6_DST_UNREACH_ADMIN: 2398 case ICMP6_DST_UNREACH_NOROUTE: 2399 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2400 case ICMP6_DST_UNREACH_ADDR: 2401 /* Transient errors */ 2402 break; 2403 default: 2404 break; 2405 } 2406 break; 2407 case ICMP6_PACKET_TOO_BIG: { 2408 struct T_unitdata_ind *tudi; 2409 struct T_opthdr *toh; 2410 size_t udi_size; 2411 mblk_t *newmp; 2412 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2413 sizeof (struct ip6_mtuinfo); 2414 sin6_t *sin6; 2415 struct ip6_mtuinfo *mtuinfo; 2416 2417 /* 2418 * If the application has requested to receive path mtu 2419 * information, send up an empty message containing an 2420 * IPV6_PATHMTU ancillary data item. 2421 */ 2422 if (!udp->udp_ipv6_recvpathmtu) 2423 break; 2424 2425 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2426 opt_length; 2427 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2428 BUMP_MIB(&udp->udp_mib, udpInErrors); 2429 break; 2430 } 2431 2432 /* 2433 * newmp->b_cont is left to NULL on purpose. This is an 2434 * empty message containing only ancillary data. 2435 */ 2436 newmp->b_datap->db_type = M_PROTO; 2437 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2438 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2439 tudi->PRIM_type = T_UNITDATA_IND; 2440 tudi->SRC_length = sizeof (sin6_t); 2441 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2442 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2443 tudi->OPT_length = opt_length; 2444 2445 sin6 = (sin6_t *)&tudi[1]; 2446 bzero(sin6, sizeof (sin6_t)); 2447 sin6->sin6_family = AF_INET6; 2448 sin6->sin6_addr = udp->udp_v6dst; 2449 2450 toh = (struct T_opthdr *)&sin6[1]; 2451 toh->level = IPPROTO_IPV6; 2452 toh->name = IPV6_PATHMTU; 2453 toh->len = opt_length; 2454 toh->status = 0; 2455 2456 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2457 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2458 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2459 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2460 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2461 /* 2462 * We've consumed everything we need from the original 2463 * message. Free it, then send our empty message. 2464 */ 2465 freemsg(mp); 2466 putnext(UDP_RD(q), newmp); 2467 return; 2468 } 2469 case ICMP6_TIME_EXCEEDED: 2470 /* Transient errors */ 2471 break; 2472 case ICMP6_PARAM_PROB: 2473 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2474 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2475 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2476 (uchar_t *)nexthdrp) { 2477 error = ECONNREFUSED; 2478 break; 2479 } 2480 break; 2481 } 2482 if (error == 0) { 2483 freemsg(mp); 2484 return; 2485 } 2486 2487 sin6 = sin6_null; 2488 sin6.sin6_family = AF_INET6; 2489 sin6.sin6_addr = ip6h->ip6_dst; 2490 sin6.sin6_port = udpha->uha_dst_port; 2491 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2492 2493 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2494 error); 2495 if (mp1) 2496 putnext(UDP_RD(q), mp1); 2497 freemsg(mp); 2498 } 2499 2500 /* 2501 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2502 * The local address is filled in if endpoint is bound. The remote address 2503 * is filled in if remote address has been precified ("connected endpoint") 2504 * (The concept of connected CLTS sockets is alien to published TPI 2505 * but we support it anyway). 2506 */ 2507 static void 2508 udp_addr_req(queue_t *q, mblk_t *mp) 2509 { 2510 sin_t *sin; 2511 sin6_t *sin6; 2512 mblk_t *ackmp; 2513 struct T_addr_ack *taa; 2514 udp_t *udp = Q_TO_UDP(q); 2515 2516 /* Make it large enough for worst case */ 2517 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2518 2 * sizeof (sin6_t), 1); 2519 if (ackmp == NULL) { 2520 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2521 return; 2522 } 2523 taa = (struct T_addr_ack *)ackmp->b_rptr; 2524 2525 bzero(taa, sizeof (struct T_addr_ack)); 2526 ackmp->b_wptr = (uchar_t *)&taa[1]; 2527 2528 taa->PRIM_type = T_ADDR_ACK; 2529 ackmp->b_datap->db_type = M_PCPROTO; 2530 /* 2531 * Note: Following code assumes 32 bit alignment of basic 2532 * data structures like sin_t and struct T_addr_ack. 2533 */ 2534 if (udp->udp_state != TS_UNBND) { 2535 /* 2536 * Fill in local address first 2537 */ 2538 taa->LOCADDR_offset = sizeof (*taa); 2539 if (udp->udp_family == AF_INET) { 2540 taa->LOCADDR_length = sizeof (sin_t); 2541 sin = (sin_t *)&taa[1]; 2542 /* Fill zeroes and then initialize non-zero fields */ 2543 *sin = sin_null; 2544 sin->sin_family = AF_INET; 2545 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2546 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2547 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2548 sin->sin_addr.s_addr); 2549 } else { 2550 /* 2551 * INADDR_ANY 2552 * udp_v6src is not set, we might be bound to 2553 * broadcast/multicast. Use udp_bound_v6src as 2554 * local address instead (that could 2555 * also still be INADDR_ANY) 2556 */ 2557 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2558 sin->sin_addr.s_addr); 2559 } 2560 sin->sin_port = udp->udp_port; 2561 ackmp->b_wptr = (uchar_t *)&sin[1]; 2562 if (udp->udp_state == TS_DATA_XFER) { 2563 /* 2564 * connected, fill remote address too 2565 */ 2566 taa->REMADDR_length = sizeof (sin_t); 2567 /* assumed 32-bit alignment */ 2568 taa->REMADDR_offset = taa->LOCADDR_offset + 2569 taa->LOCADDR_length; 2570 2571 sin = (sin_t *)(ackmp->b_rptr + 2572 taa->REMADDR_offset); 2573 /* initialize */ 2574 *sin = sin_null; 2575 sin->sin_family = AF_INET; 2576 sin->sin_addr.s_addr = 2577 V4_PART_OF_V6(udp->udp_v6dst); 2578 sin->sin_port = udp->udp_dstport; 2579 ackmp->b_wptr = (uchar_t *)&sin[1]; 2580 } 2581 } else { 2582 taa->LOCADDR_length = sizeof (sin6_t); 2583 sin6 = (sin6_t *)&taa[1]; 2584 /* Fill zeroes and then initialize non-zero fields */ 2585 *sin6 = sin6_null; 2586 sin6->sin6_family = AF_INET6; 2587 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2588 sin6->sin6_addr = udp->udp_v6src; 2589 } else { 2590 /* 2591 * UNSPECIFIED 2592 * udp_v6src is not set, we might be bound to 2593 * broadcast/multicast. Use udp_bound_v6src as 2594 * local address instead (that could 2595 * also still be UNSPECIFIED) 2596 */ 2597 sin6->sin6_addr = 2598 udp->udp_bound_v6src; 2599 } 2600 sin6->sin6_port = udp->udp_port; 2601 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2602 if (udp->udp_state == TS_DATA_XFER) { 2603 /* 2604 * connected, fill remote address too 2605 */ 2606 taa->REMADDR_length = sizeof (sin6_t); 2607 /* assumed 32-bit alignment */ 2608 taa->REMADDR_offset = taa->LOCADDR_offset + 2609 taa->LOCADDR_length; 2610 2611 sin6 = (sin6_t *)(ackmp->b_rptr + 2612 taa->REMADDR_offset); 2613 /* initialize */ 2614 *sin6 = sin6_null; 2615 sin6->sin6_family = AF_INET6; 2616 sin6->sin6_addr = udp->udp_v6dst; 2617 sin6->sin6_port = udp->udp_dstport; 2618 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2619 } 2620 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2621 } 2622 } 2623 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2624 putnext(UDP_RD(q), ackmp); 2625 } 2626 2627 static void 2628 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2629 { 2630 if (udp->udp_family == AF_INET) { 2631 *tap = udp_g_t_info_ack_ipv4; 2632 } else { 2633 *tap = udp_g_t_info_ack_ipv6; 2634 } 2635 tap->CURRENT_state = udp->udp_state; 2636 tap->OPT_size = udp_max_optsize; 2637 } 2638 2639 /* 2640 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2641 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2642 * udp_g_t_info_ack. The current state of the stream is copied from 2643 * udp_state. 2644 */ 2645 static void 2646 udp_capability_req(queue_t *q, mblk_t *mp) 2647 { 2648 t_uscalar_t cap_bits1; 2649 struct T_capability_ack *tcap; 2650 udp_t *udp = Q_TO_UDP(q); 2651 2652 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2653 2654 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2655 mp->b_datap->db_type, T_CAPABILITY_ACK); 2656 if (!mp) 2657 return; 2658 2659 tcap = (struct T_capability_ack *)mp->b_rptr; 2660 tcap->CAP_bits1 = 0; 2661 2662 if (cap_bits1 & TC1_INFO) { 2663 udp_copy_info(&tcap->INFO_ack, udp); 2664 tcap->CAP_bits1 |= TC1_INFO; 2665 } 2666 2667 putnext(UDP_RD(q), mp); 2668 } 2669 2670 /* 2671 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2672 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2673 * The current state of the stream is copied from udp_state. 2674 */ 2675 static void 2676 udp_info_req(queue_t *q, mblk_t *mp) 2677 { 2678 udp_t *udp = Q_TO_UDP(q); 2679 2680 /* Create a T_INFO_ACK message. */ 2681 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2682 T_INFO_ACK); 2683 if (!mp) 2684 return; 2685 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2686 putnext(UDP_RD(q), mp); 2687 } 2688 2689 /* 2690 * IP recognizes seven kinds of bind requests: 2691 * 2692 * - A zero-length address binds only to the protocol number. 2693 * 2694 * - A 4-byte address is treated as a request to 2695 * validate that the address is a valid local IPv4 2696 * address, appropriate for an application to bind to. 2697 * IP does the verification, but does not make any note 2698 * of the address at this time. 2699 * 2700 * - A 16-byte address contains is treated as a request 2701 * to validate a local IPv6 address, as the 4-byte 2702 * address case above. 2703 * 2704 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2705 * use it for the inbound fanout of packets. 2706 * 2707 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2708 * use it for the inbound fanout of packets. 2709 * 2710 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2711 * information consisting of local and remote addresses 2712 * and ports. In this case, the addresses are both 2713 * validated as appropriate for this operation, and, if 2714 * so, the information is retained for use in the 2715 * inbound fanout. 2716 * 2717 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2718 * fanout information, like the 12-byte case above. 2719 * 2720 * IP will also fill in the IRE request mblk with information 2721 * regarding our peer. In all cases, we notify IP of our protocol 2722 * type by appending a single protocol byte to the bind request. 2723 */ 2724 static mblk_t * 2725 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2726 { 2727 char *cp; 2728 mblk_t *mp; 2729 struct T_bind_req *tbr; 2730 ipa_conn_t *ac; 2731 ipa6_conn_t *ac6; 2732 sin_t *sin; 2733 sin6_t *sin6; 2734 2735 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2736 2737 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2738 if (!mp) 2739 return (mp); 2740 mp->b_datap->db_type = M_PROTO; 2741 tbr = (struct T_bind_req *)mp->b_rptr; 2742 tbr->PRIM_type = bind_prim; 2743 tbr->ADDR_offset = sizeof (*tbr); 2744 tbr->CONIND_number = 0; 2745 tbr->ADDR_length = addr_length; 2746 cp = (char *)&tbr[1]; 2747 switch (addr_length) { 2748 case sizeof (ipa_conn_t): 2749 ASSERT(udp->udp_family == AF_INET); 2750 /* Append a request for an IRE */ 2751 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2752 if (!mp->b_cont) { 2753 freemsg(mp); 2754 return (NULL); 2755 } 2756 mp->b_cont->b_wptr += sizeof (ire_t); 2757 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2758 2759 /* cp known to be 32 bit aligned */ 2760 ac = (ipa_conn_t *)cp; 2761 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2762 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2763 ac->ac_fport = udp->udp_dstport; 2764 ac->ac_lport = udp->udp_port; 2765 break; 2766 2767 case sizeof (ipa6_conn_t): 2768 ASSERT(udp->udp_family == AF_INET6); 2769 /* Append a request for an IRE */ 2770 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2771 if (!mp->b_cont) { 2772 freemsg(mp); 2773 return (NULL); 2774 } 2775 mp->b_cont->b_wptr += sizeof (ire_t); 2776 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2777 2778 /* cp known to be 32 bit aligned */ 2779 ac6 = (ipa6_conn_t *)cp; 2780 ac6->ac6_laddr = udp->udp_v6src; 2781 ac6->ac6_faddr = udp->udp_v6dst; 2782 ac6->ac6_fport = udp->udp_dstport; 2783 ac6->ac6_lport = udp->udp_port; 2784 break; 2785 2786 case sizeof (sin_t): 2787 ASSERT(udp->udp_family == AF_INET); 2788 /* Append a request for an IRE */ 2789 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2790 if (!mp->b_cont) { 2791 freemsg(mp); 2792 return (NULL); 2793 } 2794 mp->b_cont->b_wptr += sizeof (ire_t); 2795 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2796 2797 sin = (sin_t *)cp; 2798 *sin = sin_null; 2799 sin->sin_family = AF_INET; 2800 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2801 sin->sin_port = udp->udp_port; 2802 break; 2803 2804 case sizeof (sin6_t): 2805 ASSERT(udp->udp_family == AF_INET6); 2806 /* Append a request for an IRE */ 2807 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2808 if (!mp->b_cont) { 2809 freemsg(mp); 2810 return (NULL); 2811 } 2812 mp->b_cont->b_wptr += sizeof (ire_t); 2813 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2814 2815 sin6 = (sin6_t *)cp; 2816 *sin6 = sin6_null; 2817 sin6->sin6_family = AF_INET6; 2818 sin6->sin6_addr = udp->udp_bound_v6src; 2819 sin6->sin6_port = udp->udp_port; 2820 break; 2821 } 2822 /* Add protocol number to end */ 2823 cp[addr_length] = (char)IPPROTO_UDP; 2824 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2825 return (mp); 2826 } 2827 2828 /* 2829 * This is the open routine for udp. It allocates a udp_t structure for 2830 * the stream and, on the first open of the module, creates an ND table. 2831 */ 2832 /* ARGSUSED */ 2833 static int 2834 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2835 { 2836 int err; 2837 udp_t *udp; 2838 conn_t *connp; 2839 queue_t *ip_wq; 2840 zoneid_t zoneid; 2841 netstack_t *ns; 2842 udp_stack_t *us; 2843 2844 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2845 2846 /* If the stream is already open, return immediately. */ 2847 if (q->q_ptr != NULL) 2848 return (0); 2849 2850 /* If this is not a push of udp as a module, fail. */ 2851 if (sflag != MODOPEN) 2852 return (EINVAL); 2853 2854 ns = netstack_find_by_cred(credp); 2855 ASSERT(ns != NULL); 2856 us = ns->netstack_udp; 2857 ASSERT(us != NULL); 2858 2859 /* 2860 * For exclusive stacks we set the zoneid to zero 2861 * to make UDP operate as if in the global zone. 2862 */ 2863 if (us->us_netstack->netstack_stackid != GLOBAL_NETSTACKID) 2864 zoneid = GLOBAL_ZONEID; 2865 else 2866 zoneid = crgetzoneid(credp); 2867 2868 q->q_hiwat = us->us_recv_hiwat; 2869 WR(q)->q_hiwat = us->us_xmit_hiwat; 2870 WR(q)->q_lowat = us->us_xmit_lowat; 2871 2872 /* Insert ourselves in the stream since we're about to walk q_next */ 2873 qprocson(q); 2874 2875 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2876 bzero(udp, sizeof (*udp)); 2877 2878 /* 2879 * UDP is supported only as a module and it has to be pushed directly 2880 * above the device instance of IP. If UDP is pushed anywhere else 2881 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2882 * sake of MIB browsers and fail everything else. 2883 */ 2884 ip_wq = WR(q)->q_next; 2885 if (NOT_OVER_IP(ip_wq)) { 2886 /* Support just SNMP for MIB browsers */ 2887 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP, 2888 us->us_netstack); 2889 connp->conn_rq = q; 2890 connp->conn_wq = WR(q); 2891 connp->conn_flags |= IPCL_UDPMOD; 2892 connp->conn_cred = credp; 2893 connp->conn_zoneid = zoneid; 2894 connp->conn_udp = udp; 2895 udp->udp_us = us; 2896 udp->udp_connp = connp; 2897 q->q_ptr = WR(q)->q_ptr = connp; 2898 crhold(credp); 2899 q->q_qinfo = &udp_snmp_rinit; 2900 WR(q)->q_qinfo = &udp_snmp_winit; 2901 return (0); 2902 } 2903 2904 /* 2905 * Initialize the udp_t structure for this stream. 2906 */ 2907 q = RD(ip_wq); 2908 connp = Q_TO_CONN(q); 2909 mutex_enter(&connp->conn_lock); 2910 connp->conn_proto = IPPROTO_UDP; 2911 connp->conn_flags |= IPCL_UDP; 2912 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2913 connp->conn_udp = udp; 2914 2915 /* Set the initial state of the stream and the privilege status. */ 2916 udp->udp_connp = connp; 2917 udp->udp_state = TS_UNBND; 2918 udp->udp_mode = UDP_MT_HOT; 2919 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2920 udp->udp_family = AF_INET6; 2921 udp->udp_ipversion = IPV6_VERSION; 2922 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2923 udp->udp_ttl = us->us_ipv6_hoplimit; 2924 connp->conn_af_isv6 = B_TRUE; 2925 connp->conn_flags |= IPCL_ISV6; 2926 } else { 2927 udp->udp_family = AF_INET; 2928 udp->udp_ipversion = IPV4_VERSION; 2929 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2930 udp->udp_ttl = us->us_ipv4_ttl; 2931 connp->conn_af_isv6 = B_FALSE; 2932 connp->conn_flags &= ~IPCL_ISV6; 2933 } 2934 2935 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2936 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2937 connp->conn_zoneid = zoneid; 2938 2939 udp->udp_open_time = lbolt64; 2940 udp->udp_open_pid = curproc->p_pid; 2941 2942 /* 2943 * If the caller has the process-wide flag set, then default to MAC 2944 * exempt mode. This allows read-down to unlabeled hosts. 2945 */ 2946 if (getpflags(NET_MAC_AWARE, credp) != 0) 2947 udp->udp_mac_exempt = B_TRUE; 2948 2949 if (connp->conn_flags & IPCL_SOCKET) { 2950 udp->udp_issocket = B_TRUE; 2951 udp->udp_direct_sockfs = B_TRUE; 2952 } 2953 2954 connp->conn_ulp_labeled = is_system_labeled(); 2955 2956 mutex_exit(&connp->conn_lock); 2957 udp->udp_us = us; 2958 2959 /* 2960 * The transmit hiwat/lowat is only looked at on IP's queue. 2961 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2962 * getsockopts. 2963 */ 2964 q->q_hiwat = us->us_recv_hiwat; 2965 WR(q)->q_hiwat = us->us_xmit_hiwat; 2966 WR(q)->q_lowat = us->us_xmit_lowat; 2967 2968 if (udp->udp_family == AF_INET6) { 2969 /* Build initial header template for transmit */ 2970 if ((err = udp_build_hdrs(q, udp)) != 0) { 2971 /* XXX missing free of connp? crfree? netstack_rele? */ 2972 qprocsoff(UDP_RD(q)); 2973 udp->udp_connp = NULL; 2974 connp->conn_udp = NULL; 2975 kmem_cache_free(udp_cache, udp); 2976 return (err); 2977 } 2978 } 2979 2980 /* Set the Stream head write offset and high watermark. */ 2981 (void) mi_set_sth_wroff(UDP_RD(q), 2982 udp->udp_max_hdr_len + us->us_wroff_extra); 2983 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2984 2985 return (0); 2986 } 2987 2988 /* 2989 * Which UDP options OK to set through T_UNITDATA_REQ... 2990 */ 2991 /* ARGSUSED */ 2992 static boolean_t 2993 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2994 { 2995 return (B_TRUE); 2996 } 2997 2998 /* 2999 * This routine gets default values of certain options whose default 3000 * values are maintained by protcol specific code 3001 */ 3002 /* ARGSUSED */ 3003 int 3004 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3005 { 3006 udp_t *udp = Q_TO_UDP(q); 3007 udp_stack_t *us = udp->udp_us; 3008 int *i1 = (int *)ptr; 3009 3010 switch (level) { 3011 case IPPROTO_IP: 3012 switch (name) { 3013 case IP_MULTICAST_TTL: 3014 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 3015 return (sizeof (uchar_t)); 3016 case IP_MULTICAST_LOOP: 3017 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 3018 return (sizeof (uchar_t)); 3019 } 3020 break; 3021 case IPPROTO_IPV6: 3022 switch (name) { 3023 case IPV6_MULTICAST_HOPS: 3024 *i1 = IP_DEFAULT_MULTICAST_TTL; 3025 return (sizeof (int)); 3026 case IPV6_MULTICAST_LOOP: 3027 *i1 = IP_DEFAULT_MULTICAST_LOOP; 3028 return (sizeof (int)); 3029 case IPV6_UNICAST_HOPS: 3030 *i1 = us->us_ipv6_hoplimit; 3031 return (sizeof (int)); 3032 } 3033 break; 3034 } 3035 return (-1); 3036 } 3037 3038 /* 3039 * This routine retrieves the current status of socket options 3040 * and expects the caller to pass in the queue pointer of the 3041 * upper instance. It returns the size of the option retrieved. 3042 */ 3043 int 3044 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3045 { 3046 int *i1 = (int *)ptr; 3047 conn_t *connp; 3048 udp_t *udp; 3049 ip6_pkt_t *ipp; 3050 int len; 3051 udp_stack_t *us; 3052 3053 q = UDP_WR(q); 3054 connp = Q_TO_CONN(q); 3055 udp = connp->conn_udp; 3056 ipp = &udp->udp_sticky_ipp; 3057 us = udp->udp_us; 3058 3059 switch (level) { 3060 case SOL_SOCKET: 3061 switch (name) { 3062 case SO_DEBUG: 3063 *i1 = udp->udp_debug; 3064 break; /* goto sizeof (int) option return */ 3065 case SO_REUSEADDR: 3066 *i1 = udp->udp_reuseaddr; 3067 break; /* goto sizeof (int) option return */ 3068 case SO_TYPE: 3069 *i1 = SOCK_DGRAM; 3070 break; /* goto sizeof (int) option return */ 3071 3072 /* 3073 * The following three items are available here, 3074 * but are only meaningful to IP. 3075 */ 3076 case SO_DONTROUTE: 3077 *i1 = udp->udp_dontroute; 3078 break; /* goto sizeof (int) option return */ 3079 case SO_USELOOPBACK: 3080 *i1 = udp->udp_useloopback; 3081 break; /* goto sizeof (int) option return */ 3082 case SO_BROADCAST: 3083 *i1 = udp->udp_broadcast; 3084 break; /* goto sizeof (int) option return */ 3085 3086 case SO_SNDBUF: 3087 *i1 = q->q_hiwat; 3088 break; /* goto sizeof (int) option return */ 3089 case SO_RCVBUF: 3090 *i1 = RD(q)->q_hiwat; 3091 break; /* goto sizeof (int) option return */ 3092 case SO_DGRAM_ERRIND: 3093 *i1 = udp->udp_dgram_errind; 3094 break; /* goto sizeof (int) option return */ 3095 case SO_RECVUCRED: 3096 *i1 = udp->udp_recvucred; 3097 break; /* goto sizeof (int) option return */ 3098 case SO_TIMESTAMP: 3099 *i1 = udp->udp_timestamp; 3100 break; /* goto sizeof (int) option return */ 3101 case SO_ANON_MLP: 3102 *i1 = udp->udp_anon_mlp; 3103 break; /* goto sizeof (int) option return */ 3104 case SO_MAC_EXEMPT: 3105 *i1 = udp->udp_mac_exempt; 3106 break; /* goto sizeof (int) option return */ 3107 case SO_ALLZONES: 3108 *i1 = connp->conn_allzones; 3109 break; /* goto sizeof (int) option return */ 3110 case SO_EXCLBIND: 3111 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 3112 break; 3113 case SO_PROTOTYPE: 3114 *i1 = IPPROTO_UDP; 3115 break; 3116 case SO_DOMAIN: 3117 *i1 = udp->udp_family; 3118 break; 3119 default: 3120 return (-1); 3121 } 3122 break; 3123 case IPPROTO_IP: 3124 if (udp->udp_family != AF_INET) 3125 return (-1); 3126 switch (name) { 3127 case IP_OPTIONS: 3128 case T_IP_OPTIONS: 3129 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 3130 if (len > 0) { 3131 bcopy(udp->udp_ip_rcv_options + 3132 udp->udp_label_len, ptr, len); 3133 } 3134 return (len); 3135 case IP_TOS: 3136 case T_IP_TOS: 3137 *i1 = (int)udp->udp_type_of_service; 3138 break; /* goto sizeof (int) option return */ 3139 case IP_TTL: 3140 *i1 = (int)udp->udp_ttl; 3141 break; /* goto sizeof (int) option return */ 3142 case IP_NEXTHOP: 3143 case IP_RECVPKTINFO: 3144 /* 3145 * This also handles IP_PKTINFO. 3146 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 3147 * Differentiation is based on the size of the argument 3148 * passed in. 3149 * This option is handled in IP which will return an 3150 * error for IP_PKTINFO as it's not supported as a 3151 * sticky option. 3152 */ 3153 return (-EINVAL); 3154 case IP_MULTICAST_IF: 3155 /* 0 address if not set */ 3156 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3157 return (sizeof (ipaddr_t)); 3158 case IP_MULTICAST_TTL: 3159 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3160 return (sizeof (uchar_t)); 3161 case IP_MULTICAST_LOOP: 3162 *ptr = connp->conn_multicast_loop; 3163 return (sizeof (uint8_t)); 3164 case IP_RECVOPTS: 3165 *i1 = udp->udp_recvopts; 3166 break; /* goto sizeof (int) option return */ 3167 case IP_RECVDSTADDR: 3168 *i1 = udp->udp_recvdstaddr; 3169 break; /* goto sizeof (int) option return */ 3170 case IP_RECVIF: 3171 *i1 = udp->udp_recvif; 3172 break; /* goto sizeof (int) option return */ 3173 case IP_RECVSLLA: 3174 *i1 = udp->udp_recvslla; 3175 break; /* goto sizeof (int) option return */ 3176 case IP_RECVTTL: 3177 *i1 = udp->udp_recvttl; 3178 break; /* goto sizeof (int) option return */ 3179 case IP_ADD_MEMBERSHIP: 3180 case IP_DROP_MEMBERSHIP: 3181 case IP_BLOCK_SOURCE: 3182 case IP_UNBLOCK_SOURCE: 3183 case IP_ADD_SOURCE_MEMBERSHIP: 3184 case IP_DROP_SOURCE_MEMBERSHIP: 3185 case MCAST_JOIN_GROUP: 3186 case MCAST_LEAVE_GROUP: 3187 case MCAST_BLOCK_SOURCE: 3188 case MCAST_UNBLOCK_SOURCE: 3189 case MCAST_JOIN_SOURCE_GROUP: 3190 case MCAST_LEAVE_SOURCE_GROUP: 3191 case IP_DONTFAILOVER_IF: 3192 /* cannot "get" the value for these */ 3193 return (-1); 3194 case IP_BOUND_IF: 3195 /* Zero if not set */ 3196 *i1 = udp->udp_bound_if; 3197 break; /* goto sizeof (int) option return */ 3198 case IP_UNSPEC_SRC: 3199 *i1 = udp->udp_unspec_source; 3200 break; /* goto sizeof (int) option return */ 3201 case IP_XMIT_IF: 3202 *i1 = udp->udp_xmit_if; 3203 break; /* goto sizeof (int) option return */ 3204 default: 3205 return (-1); 3206 } 3207 break; 3208 case IPPROTO_IPV6: 3209 if (udp->udp_family != AF_INET6) 3210 return (-1); 3211 switch (name) { 3212 case IPV6_UNICAST_HOPS: 3213 *i1 = (unsigned int)udp->udp_ttl; 3214 break; /* goto sizeof (int) option return */ 3215 case IPV6_MULTICAST_IF: 3216 /* 0 index if not set */ 3217 *i1 = udp->udp_multicast_if_index; 3218 break; /* goto sizeof (int) option return */ 3219 case IPV6_MULTICAST_HOPS: 3220 *i1 = udp->udp_multicast_ttl; 3221 break; /* goto sizeof (int) option return */ 3222 case IPV6_MULTICAST_LOOP: 3223 *i1 = connp->conn_multicast_loop; 3224 break; /* goto sizeof (int) option return */ 3225 case IPV6_JOIN_GROUP: 3226 case IPV6_LEAVE_GROUP: 3227 case MCAST_JOIN_GROUP: 3228 case MCAST_LEAVE_GROUP: 3229 case MCAST_BLOCK_SOURCE: 3230 case MCAST_UNBLOCK_SOURCE: 3231 case MCAST_JOIN_SOURCE_GROUP: 3232 case MCAST_LEAVE_SOURCE_GROUP: 3233 /* cannot "get" the value for these */ 3234 return (-1); 3235 case IPV6_BOUND_IF: 3236 /* Zero if not set */ 3237 *i1 = udp->udp_bound_if; 3238 break; /* goto sizeof (int) option return */ 3239 case IPV6_UNSPEC_SRC: 3240 *i1 = udp->udp_unspec_source; 3241 break; /* goto sizeof (int) option return */ 3242 case IPV6_RECVPKTINFO: 3243 *i1 = udp->udp_ip_recvpktinfo; 3244 break; /* goto sizeof (int) option return */ 3245 case IPV6_RECVTCLASS: 3246 *i1 = udp->udp_ipv6_recvtclass; 3247 break; /* goto sizeof (int) option return */ 3248 case IPV6_RECVPATHMTU: 3249 *i1 = udp->udp_ipv6_recvpathmtu; 3250 break; /* goto sizeof (int) option return */ 3251 case IPV6_RECVHOPLIMIT: 3252 *i1 = udp->udp_ipv6_recvhoplimit; 3253 break; /* goto sizeof (int) option return */ 3254 case IPV6_RECVHOPOPTS: 3255 *i1 = udp->udp_ipv6_recvhopopts; 3256 break; /* goto sizeof (int) option return */ 3257 case IPV6_RECVDSTOPTS: 3258 *i1 = udp->udp_ipv6_recvdstopts; 3259 break; /* goto sizeof (int) option return */ 3260 case _OLD_IPV6_RECVDSTOPTS: 3261 *i1 = udp->udp_old_ipv6_recvdstopts; 3262 break; /* goto sizeof (int) option return */ 3263 case IPV6_RECVRTHDRDSTOPTS: 3264 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3265 break; /* goto sizeof (int) option return */ 3266 case IPV6_RECVRTHDR: 3267 *i1 = udp->udp_ipv6_recvrthdr; 3268 break; /* goto sizeof (int) option return */ 3269 case IPV6_PKTINFO: { 3270 /* XXX assumes that caller has room for max size! */ 3271 struct in6_pktinfo *pkti; 3272 3273 pkti = (struct in6_pktinfo *)ptr; 3274 if (ipp->ipp_fields & IPPF_IFINDEX) 3275 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3276 else 3277 pkti->ipi6_ifindex = 0; 3278 if (ipp->ipp_fields & IPPF_ADDR) 3279 pkti->ipi6_addr = ipp->ipp_addr; 3280 else 3281 pkti->ipi6_addr = ipv6_all_zeros; 3282 return (sizeof (struct in6_pktinfo)); 3283 } 3284 case IPV6_TCLASS: 3285 if (ipp->ipp_fields & IPPF_TCLASS) 3286 *i1 = ipp->ipp_tclass; 3287 else 3288 *i1 = IPV6_FLOW_TCLASS( 3289 IPV6_DEFAULT_VERS_AND_FLOW); 3290 break; /* goto sizeof (int) option return */ 3291 case IPV6_NEXTHOP: { 3292 sin6_t *sin6 = (sin6_t *)ptr; 3293 3294 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3295 return (0); 3296 *sin6 = sin6_null; 3297 sin6->sin6_family = AF_INET6; 3298 sin6->sin6_addr = ipp->ipp_nexthop; 3299 return (sizeof (sin6_t)); 3300 } 3301 case IPV6_HOPOPTS: 3302 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3303 return (0); 3304 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 3305 return (0); 3306 /* 3307 * The cipso/label option is added by kernel. 3308 * User is not usually aware of this option. 3309 * We copy out the hbh opt after the label option. 3310 */ 3311 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 3312 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3313 if (udp->udp_label_len_v6 > 0) { 3314 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 3315 ptr[1] = (ipp->ipp_hopoptslen - 3316 udp->udp_label_len_v6 + 7) / 8 - 1; 3317 } 3318 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3319 case IPV6_RTHDRDSTOPTS: 3320 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3321 return (0); 3322 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3323 return (ipp->ipp_rtdstoptslen); 3324 case IPV6_RTHDR: 3325 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3326 return (0); 3327 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3328 return (ipp->ipp_rthdrlen); 3329 case IPV6_DSTOPTS: 3330 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3331 return (0); 3332 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3333 return (ipp->ipp_dstoptslen); 3334 case IPV6_PATHMTU: 3335 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3336 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 3337 us->us_netstack)); 3338 default: 3339 return (-1); 3340 } 3341 break; 3342 case IPPROTO_UDP: 3343 switch (name) { 3344 case UDP_ANONPRIVBIND: 3345 *i1 = udp->udp_anon_priv_bind; 3346 break; 3347 case UDP_EXCLBIND: 3348 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3349 break; 3350 case UDP_RCVHDR: 3351 *i1 = udp->udp_rcvhdr ? 1 : 0; 3352 break; 3353 case UDP_NAT_T_ENDPOINT: 3354 *i1 = udp->udp_nat_t_endpoint; 3355 break; 3356 default: 3357 return (-1); 3358 } 3359 break; 3360 default: 3361 return (-1); 3362 } 3363 return (sizeof (int)); 3364 } 3365 3366 /* 3367 * This routine sets socket options; it expects the caller 3368 * to pass in the queue pointer of the upper instance. 3369 */ 3370 /* ARGSUSED */ 3371 int 3372 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3373 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3374 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3375 { 3376 udpattrs_t *attrs = thisdg_attrs; 3377 int *i1 = (int *)invalp; 3378 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3379 boolean_t checkonly; 3380 int error; 3381 conn_t *connp; 3382 udp_t *udp; 3383 uint_t newlen; 3384 udp_stack_t *us; 3385 3386 q = UDP_WR(q); 3387 connp = Q_TO_CONN(q); 3388 udp = connp->conn_udp; 3389 us = udp->udp_us; 3390 3391 switch (optset_context) { 3392 case SETFN_OPTCOM_CHECKONLY: 3393 checkonly = B_TRUE; 3394 /* 3395 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3396 * inlen != 0 implies value supplied and 3397 * we have to "pretend" to set it. 3398 * inlen == 0 implies that there is no 3399 * value part in T_CHECK request and just validation 3400 * done elsewhere should be enough, we just return here. 3401 */ 3402 if (inlen == 0) { 3403 *outlenp = 0; 3404 return (0); 3405 } 3406 break; 3407 case SETFN_OPTCOM_NEGOTIATE: 3408 checkonly = B_FALSE; 3409 break; 3410 case SETFN_UD_NEGOTIATE: 3411 case SETFN_CONN_NEGOTIATE: 3412 checkonly = B_FALSE; 3413 /* 3414 * Negotiating local and "association-related" options 3415 * through T_UNITDATA_REQ. 3416 * 3417 * Following routine can filter out ones we do not 3418 * want to be "set" this way. 3419 */ 3420 if (!udp_opt_allow_udr_set(level, name)) { 3421 *outlenp = 0; 3422 return (EINVAL); 3423 } 3424 break; 3425 default: 3426 /* 3427 * We should never get here 3428 */ 3429 *outlenp = 0; 3430 return (EINVAL); 3431 } 3432 3433 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3434 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3435 3436 /* 3437 * For fixed length options, no sanity check 3438 * of passed in length is done. It is assumed *_optcom_req() 3439 * routines do the right thing. 3440 */ 3441 3442 switch (level) { 3443 case SOL_SOCKET: 3444 switch (name) { 3445 case SO_REUSEADDR: 3446 if (!checkonly) 3447 udp->udp_reuseaddr = onoff; 3448 break; 3449 case SO_DEBUG: 3450 if (!checkonly) 3451 udp->udp_debug = onoff; 3452 break; 3453 /* 3454 * The following three items are available here, 3455 * but are only meaningful to IP. 3456 */ 3457 case SO_DONTROUTE: 3458 if (!checkonly) 3459 udp->udp_dontroute = onoff; 3460 break; 3461 case SO_USELOOPBACK: 3462 if (!checkonly) 3463 udp->udp_useloopback = onoff; 3464 break; 3465 case SO_BROADCAST: 3466 if (!checkonly) 3467 udp->udp_broadcast = onoff; 3468 break; 3469 3470 case SO_SNDBUF: 3471 if (*i1 > us->us_max_buf) { 3472 *outlenp = 0; 3473 return (ENOBUFS); 3474 } 3475 if (!checkonly) { 3476 q->q_hiwat = *i1; 3477 WR(UDP_RD(q))->q_hiwat = *i1; 3478 } 3479 break; 3480 case SO_RCVBUF: 3481 if (*i1 > us->us_max_buf) { 3482 *outlenp = 0; 3483 return (ENOBUFS); 3484 } 3485 if (!checkonly) { 3486 RD(q)->q_hiwat = *i1; 3487 UDP_RD(q)->q_hiwat = *i1; 3488 (void) mi_set_sth_hiwat(UDP_RD(q), 3489 udp_set_rcv_hiwat(udp, *i1)); 3490 } 3491 break; 3492 case SO_DGRAM_ERRIND: 3493 if (!checkonly) 3494 udp->udp_dgram_errind = onoff; 3495 break; 3496 case SO_RECVUCRED: 3497 if (!checkonly) 3498 udp->udp_recvucred = onoff; 3499 break; 3500 case SO_ALLZONES: 3501 /* 3502 * "soft" error (negative) 3503 * option not handled at this level 3504 * Do not modify *outlenp. 3505 */ 3506 return (-EINVAL); 3507 case SO_TIMESTAMP: 3508 if (!checkonly) 3509 udp->udp_timestamp = onoff; 3510 break; 3511 case SO_ANON_MLP: 3512 if (!checkonly) 3513 udp->udp_anon_mlp = onoff; 3514 break; 3515 case SO_MAC_EXEMPT: 3516 if (secpolicy_net_mac_aware(cr) != 0 || 3517 udp->udp_state != TS_UNBND) 3518 return (EACCES); 3519 if (!checkonly) 3520 udp->udp_mac_exempt = onoff; 3521 break; 3522 case SCM_UCRED: { 3523 struct ucred_s *ucr; 3524 cred_t *cr, *newcr; 3525 ts_label_t *tsl; 3526 3527 /* 3528 * Only sockets that have proper privileges and are 3529 * bound to MLPs will have any other value here, so 3530 * this implicitly tests for privilege to set label. 3531 */ 3532 if (connp->conn_mlp_type == mlptSingle) 3533 break; 3534 ucr = (struct ucred_s *)invalp; 3535 if (inlen != ucredsize || 3536 ucr->uc_labeloff < sizeof (*ucr) || 3537 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3538 return (EINVAL); 3539 if (!checkonly) { 3540 mblk_t *mb; 3541 3542 if (attrs == NULL || 3543 (mb = attrs->udpattr_mb) == NULL) 3544 return (EINVAL); 3545 if ((cr = DB_CRED(mb)) == NULL) 3546 cr = udp->udp_connp->conn_cred; 3547 ASSERT(cr != NULL); 3548 if ((tsl = crgetlabel(cr)) == NULL) 3549 return (EINVAL); 3550 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3551 tsl->tsl_doi, KM_NOSLEEP); 3552 if (newcr == NULL) 3553 return (ENOSR); 3554 mblk_setcred(mb, newcr); 3555 attrs->udpattr_credset = B_TRUE; 3556 crfree(newcr); 3557 } 3558 break; 3559 } 3560 case SO_EXCLBIND: 3561 if (!checkonly) 3562 udp->udp_exclbind = onoff; 3563 break; 3564 default: 3565 *outlenp = 0; 3566 return (EINVAL); 3567 } 3568 break; 3569 case IPPROTO_IP: 3570 if (udp->udp_family != AF_INET) { 3571 *outlenp = 0; 3572 return (ENOPROTOOPT); 3573 } 3574 switch (name) { 3575 case IP_OPTIONS: 3576 case T_IP_OPTIONS: 3577 /* Save options for use by IP. */ 3578 newlen = inlen + udp->udp_label_len; 3579 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3580 *outlenp = 0; 3581 return (EINVAL); 3582 } 3583 if (checkonly) 3584 break; 3585 3586 if (!tsol_option_set(&udp->udp_ip_snd_options, 3587 &udp->udp_ip_snd_options_len, 3588 udp->udp_label_len, invalp, inlen)) { 3589 *outlenp = 0; 3590 return (ENOMEM); 3591 } 3592 3593 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3594 UDPH_SIZE + udp->udp_ip_snd_options_len; 3595 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3596 us->us_wroff_extra); 3597 break; 3598 3599 case IP_TTL: 3600 if (!checkonly) { 3601 udp->udp_ttl = (uchar_t)*i1; 3602 } 3603 break; 3604 case IP_TOS: 3605 case T_IP_TOS: 3606 if (!checkonly) { 3607 udp->udp_type_of_service = (uchar_t)*i1; 3608 } 3609 break; 3610 case IP_MULTICAST_IF: { 3611 /* 3612 * TODO should check OPTMGMT reply and undo this if 3613 * there is an error. 3614 */ 3615 struct in_addr *inap = (struct in_addr *)invalp; 3616 if (!checkonly) { 3617 udp->udp_multicast_if_addr = 3618 inap->s_addr; 3619 } 3620 break; 3621 } 3622 case IP_MULTICAST_TTL: 3623 if (!checkonly) 3624 udp->udp_multicast_ttl = *invalp; 3625 break; 3626 case IP_MULTICAST_LOOP: 3627 if (!checkonly) 3628 connp->conn_multicast_loop = *invalp; 3629 break; 3630 case IP_RECVOPTS: 3631 if (!checkonly) 3632 udp->udp_recvopts = onoff; 3633 break; 3634 case IP_RECVDSTADDR: 3635 if (!checkonly) 3636 udp->udp_recvdstaddr = onoff; 3637 break; 3638 case IP_RECVIF: 3639 if (!checkonly) 3640 udp->udp_recvif = onoff; 3641 break; 3642 case IP_RECVSLLA: 3643 if (!checkonly) 3644 udp->udp_recvslla = onoff; 3645 break; 3646 case IP_RECVTTL: 3647 if (!checkonly) 3648 udp->udp_recvttl = onoff; 3649 break; 3650 case IP_PKTINFO: { 3651 /* 3652 * This also handles IP_RECVPKTINFO. 3653 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3654 * Differentiation is based on the size of the 3655 * argument passed in. 3656 */ 3657 struct in_pktinfo *pktinfop; 3658 ip4_pkt_t *attr_pktinfop; 3659 3660 if (checkonly) 3661 break; 3662 3663 if (inlen == sizeof (int)) { 3664 /* 3665 * This is IP_RECVPKTINFO option. 3666 * Keep a local copy of whether this option is 3667 * set or not and pass it down to IP for 3668 * processing. 3669 */ 3670 3671 udp->udp_ip_recvpktinfo = onoff; 3672 return (-EINVAL); 3673 } 3674 3675 if (attrs == NULL || 3676 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3677 /* 3678 * sticky option or no buffer to return 3679 * the results. 3680 */ 3681 return (EINVAL); 3682 } 3683 3684 if (inlen != sizeof (struct in_pktinfo)) 3685 return (EINVAL); 3686 3687 pktinfop = (struct in_pktinfo *)invalp; 3688 3689 /* 3690 * At least one of the values should be specified 3691 */ 3692 if (pktinfop->ipi_ifindex == 0 && 3693 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3694 return (EINVAL); 3695 } 3696 3697 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3698 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3699 3700 break; 3701 } 3702 case IP_ADD_MEMBERSHIP: 3703 case IP_DROP_MEMBERSHIP: 3704 case IP_BLOCK_SOURCE: 3705 case IP_UNBLOCK_SOURCE: 3706 case IP_ADD_SOURCE_MEMBERSHIP: 3707 case IP_DROP_SOURCE_MEMBERSHIP: 3708 case MCAST_JOIN_GROUP: 3709 case MCAST_LEAVE_GROUP: 3710 case MCAST_BLOCK_SOURCE: 3711 case MCAST_UNBLOCK_SOURCE: 3712 case MCAST_JOIN_SOURCE_GROUP: 3713 case MCAST_LEAVE_SOURCE_GROUP: 3714 case IP_SEC_OPT: 3715 case IP_NEXTHOP: 3716 /* 3717 * "soft" error (negative) 3718 * option not handled at this level 3719 * Do not modify *outlenp. 3720 */ 3721 return (-EINVAL); 3722 case IP_BOUND_IF: 3723 if (!checkonly) 3724 udp->udp_bound_if = *i1; 3725 break; 3726 case IP_UNSPEC_SRC: 3727 if (!checkonly) 3728 udp->udp_unspec_source = onoff; 3729 break; 3730 case IP_XMIT_IF: 3731 if (!checkonly) 3732 udp->udp_xmit_if = *i1; 3733 break; 3734 default: 3735 *outlenp = 0; 3736 return (EINVAL); 3737 } 3738 break; 3739 case IPPROTO_IPV6: { 3740 ip6_pkt_t *ipp; 3741 boolean_t sticky; 3742 3743 if (udp->udp_family != AF_INET6) { 3744 *outlenp = 0; 3745 return (ENOPROTOOPT); 3746 } 3747 /* 3748 * Deal with both sticky options and ancillary data 3749 */ 3750 sticky = B_FALSE; 3751 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3752 NULL) { 3753 /* sticky options, or none */ 3754 ipp = &udp->udp_sticky_ipp; 3755 sticky = B_TRUE; 3756 } 3757 3758 switch (name) { 3759 case IPV6_MULTICAST_IF: 3760 if (!checkonly) 3761 udp->udp_multicast_if_index = *i1; 3762 break; 3763 case IPV6_UNICAST_HOPS: 3764 /* -1 means use default */ 3765 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3766 *outlenp = 0; 3767 return (EINVAL); 3768 } 3769 if (!checkonly) { 3770 if (*i1 == -1) { 3771 udp->udp_ttl = ipp->ipp_unicast_hops = 3772 us->us_ipv6_hoplimit; 3773 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3774 /* Pass modified value to IP. */ 3775 *i1 = udp->udp_ttl; 3776 } else { 3777 udp->udp_ttl = ipp->ipp_unicast_hops = 3778 (uint8_t)*i1; 3779 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3780 } 3781 /* Rebuild the header template */ 3782 error = udp_build_hdrs(q, udp); 3783 if (error != 0) { 3784 *outlenp = 0; 3785 return (error); 3786 } 3787 } 3788 break; 3789 case IPV6_MULTICAST_HOPS: 3790 /* -1 means use default */ 3791 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3792 *outlenp = 0; 3793 return (EINVAL); 3794 } 3795 if (!checkonly) { 3796 if (*i1 == -1) { 3797 udp->udp_multicast_ttl = 3798 ipp->ipp_multicast_hops = 3799 IP_DEFAULT_MULTICAST_TTL; 3800 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3801 /* Pass modified value to IP. */ 3802 *i1 = udp->udp_multicast_ttl; 3803 } else { 3804 udp->udp_multicast_ttl = 3805 ipp->ipp_multicast_hops = 3806 (uint8_t)*i1; 3807 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3808 } 3809 } 3810 break; 3811 case IPV6_MULTICAST_LOOP: 3812 if (*i1 != 0 && *i1 != 1) { 3813 *outlenp = 0; 3814 return (EINVAL); 3815 } 3816 if (!checkonly) 3817 connp->conn_multicast_loop = *i1; 3818 break; 3819 case IPV6_JOIN_GROUP: 3820 case IPV6_LEAVE_GROUP: 3821 case MCAST_JOIN_GROUP: 3822 case MCAST_LEAVE_GROUP: 3823 case MCAST_BLOCK_SOURCE: 3824 case MCAST_UNBLOCK_SOURCE: 3825 case MCAST_JOIN_SOURCE_GROUP: 3826 case MCAST_LEAVE_SOURCE_GROUP: 3827 /* 3828 * "soft" error (negative) 3829 * option not handled at this level 3830 * Note: Do not modify *outlenp 3831 */ 3832 return (-EINVAL); 3833 case IPV6_BOUND_IF: 3834 if (!checkonly) 3835 udp->udp_bound_if = *i1; 3836 break; 3837 case IPV6_UNSPEC_SRC: 3838 if (!checkonly) 3839 udp->udp_unspec_source = onoff; 3840 break; 3841 /* 3842 * Set boolean switches for ancillary data delivery 3843 */ 3844 case IPV6_RECVPKTINFO: 3845 if (!checkonly) 3846 udp->udp_ip_recvpktinfo = onoff; 3847 break; 3848 case IPV6_RECVTCLASS: 3849 if (!checkonly) { 3850 udp->udp_ipv6_recvtclass = onoff; 3851 } 3852 break; 3853 case IPV6_RECVPATHMTU: 3854 if (!checkonly) { 3855 udp->udp_ipv6_recvpathmtu = onoff; 3856 } 3857 break; 3858 case IPV6_RECVHOPLIMIT: 3859 if (!checkonly) 3860 udp->udp_ipv6_recvhoplimit = onoff; 3861 break; 3862 case IPV6_RECVHOPOPTS: 3863 if (!checkonly) 3864 udp->udp_ipv6_recvhopopts = onoff; 3865 break; 3866 case IPV6_RECVDSTOPTS: 3867 if (!checkonly) 3868 udp->udp_ipv6_recvdstopts = onoff; 3869 break; 3870 case _OLD_IPV6_RECVDSTOPTS: 3871 if (!checkonly) 3872 udp->udp_old_ipv6_recvdstopts = onoff; 3873 break; 3874 case IPV6_RECVRTHDRDSTOPTS: 3875 if (!checkonly) 3876 udp->udp_ipv6_recvrthdrdstopts = onoff; 3877 break; 3878 case IPV6_RECVRTHDR: 3879 if (!checkonly) 3880 udp->udp_ipv6_recvrthdr = onoff; 3881 break; 3882 /* 3883 * Set sticky options or ancillary data. 3884 * If sticky options, (re)build any extension headers 3885 * that might be needed as a result. 3886 */ 3887 case IPV6_PKTINFO: 3888 /* 3889 * The source address and ifindex are verified 3890 * in ip_opt_set(). For ancillary data the 3891 * source address is checked in ip_wput_v6. 3892 */ 3893 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3894 return (EINVAL); 3895 if (checkonly) 3896 break; 3897 3898 if (inlen == 0) { 3899 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3900 ipp->ipp_sticky_ignored |= 3901 (IPPF_IFINDEX|IPPF_ADDR); 3902 } else { 3903 struct in6_pktinfo *pkti; 3904 3905 pkti = (struct in6_pktinfo *)invalp; 3906 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3907 ipp->ipp_addr = pkti->ipi6_addr; 3908 if (ipp->ipp_ifindex != 0) 3909 ipp->ipp_fields |= IPPF_IFINDEX; 3910 else 3911 ipp->ipp_fields &= ~IPPF_IFINDEX; 3912 if (!IN6_IS_ADDR_UNSPECIFIED( 3913 &ipp->ipp_addr)) 3914 ipp->ipp_fields |= IPPF_ADDR; 3915 else 3916 ipp->ipp_fields &= ~IPPF_ADDR; 3917 } 3918 if (sticky) { 3919 error = udp_build_hdrs(q, udp); 3920 if (error != 0) 3921 return (error); 3922 } 3923 break; 3924 case IPV6_HOPLIMIT: 3925 if (sticky) 3926 return (EINVAL); 3927 if (inlen != 0 && inlen != sizeof (int)) 3928 return (EINVAL); 3929 if (checkonly) 3930 break; 3931 3932 if (inlen == 0) { 3933 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3934 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3935 } else { 3936 if (*i1 > 255 || *i1 < -1) 3937 return (EINVAL); 3938 if (*i1 == -1) 3939 ipp->ipp_hoplimit = 3940 us->us_ipv6_hoplimit; 3941 else 3942 ipp->ipp_hoplimit = *i1; 3943 ipp->ipp_fields |= IPPF_HOPLIMIT; 3944 } 3945 break; 3946 case IPV6_TCLASS: 3947 if (inlen != 0 && inlen != sizeof (int)) 3948 return (EINVAL); 3949 if (checkonly) 3950 break; 3951 3952 if (inlen == 0) { 3953 ipp->ipp_fields &= ~IPPF_TCLASS; 3954 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3955 } else { 3956 if (*i1 > 255 || *i1 < -1) 3957 return (EINVAL); 3958 if (*i1 == -1) 3959 ipp->ipp_tclass = 0; 3960 else 3961 ipp->ipp_tclass = *i1; 3962 ipp->ipp_fields |= IPPF_TCLASS; 3963 } 3964 if (sticky) { 3965 error = udp_build_hdrs(q, udp); 3966 if (error != 0) 3967 return (error); 3968 } 3969 break; 3970 case IPV6_NEXTHOP: 3971 /* 3972 * IP will verify that the nexthop is reachable 3973 * and fail for sticky options. 3974 */ 3975 if (inlen != 0 && inlen != sizeof (sin6_t)) 3976 return (EINVAL); 3977 if (checkonly) 3978 break; 3979 3980 if (inlen == 0) { 3981 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3982 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3983 } else { 3984 sin6_t *sin6 = (sin6_t *)invalp; 3985 3986 if (sin6->sin6_family != AF_INET6) 3987 return (EAFNOSUPPORT); 3988 if (IN6_IS_ADDR_V4MAPPED( 3989 &sin6->sin6_addr)) 3990 return (EADDRNOTAVAIL); 3991 ipp->ipp_nexthop = sin6->sin6_addr; 3992 if (!IN6_IS_ADDR_UNSPECIFIED( 3993 &ipp->ipp_nexthop)) 3994 ipp->ipp_fields |= IPPF_NEXTHOP; 3995 else 3996 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3997 } 3998 if (sticky) { 3999 error = udp_build_hdrs(q, udp); 4000 if (error != 0) 4001 return (error); 4002 } 4003 break; 4004 case IPV6_HOPOPTS: { 4005 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 4006 /* 4007 * Sanity checks - minimum size, size a multiple of 4008 * eight bytes, and matching size passed in. 4009 */ 4010 if (inlen != 0 && 4011 inlen != (8 * (hopts->ip6h_len + 1))) 4012 return (EINVAL); 4013 4014 if (checkonly) 4015 break; 4016 4017 error = optcom_pkt_set(invalp, inlen, sticky, 4018 (uchar_t **)&ipp->ipp_hopopts, 4019 &ipp->ipp_hopoptslen, 4020 sticky ? udp->udp_label_len_v6 : 0); 4021 if (error != 0) 4022 return (error); 4023 if (ipp->ipp_hopoptslen == 0) { 4024 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4025 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 4026 } else { 4027 ipp->ipp_fields |= IPPF_HOPOPTS; 4028 } 4029 if (sticky) { 4030 error = udp_build_hdrs(q, udp); 4031 if (error != 0) 4032 return (error); 4033 } 4034 break; 4035 } 4036 case IPV6_RTHDRDSTOPTS: { 4037 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4038 4039 /* 4040 * Sanity checks - minimum size, size a multiple of 4041 * eight bytes, and matching size passed in. 4042 */ 4043 if (inlen != 0 && 4044 inlen != (8 * (dopts->ip6d_len + 1))) 4045 return (EINVAL); 4046 4047 if (checkonly) 4048 break; 4049 4050 if (inlen == 0) { 4051 if (sticky && 4052 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 4053 kmem_free(ipp->ipp_rtdstopts, 4054 ipp->ipp_rtdstoptslen); 4055 ipp->ipp_rtdstopts = NULL; 4056 ipp->ipp_rtdstoptslen = 0; 4057 } 4058 4059 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 4060 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 4061 } else { 4062 error = optcom_pkt_set(invalp, inlen, sticky, 4063 (uchar_t **)&ipp->ipp_rtdstopts, 4064 &ipp->ipp_rtdstoptslen, 0); 4065 if (error != 0) 4066 return (error); 4067 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4068 } 4069 if (sticky) { 4070 error = udp_build_hdrs(q, udp); 4071 if (error != 0) 4072 return (error); 4073 } 4074 break; 4075 } 4076 case IPV6_DSTOPTS: { 4077 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4078 4079 /* 4080 * Sanity checks - minimum size, size a multiple of 4081 * eight bytes, and matching size passed in. 4082 */ 4083 if (inlen != 0 && 4084 inlen != (8 * (dopts->ip6d_len + 1))) 4085 return (EINVAL); 4086 4087 if (checkonly) 4088 break; 4089 4090 if (inlen == 0) { 4091 if (sticky && 4092 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 4093 kmem_free(ipp->ipp_dstopts, 4094 ipp->ipp_dstoptslen); 4095 ipp->ipp_dstopts = NULL; 4096 ipp->ipp_dstoptslen = 0; 4097 } 4098 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4099 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 4100 } else { 4101 error = optcom_pkt_set(invalp, inlen, sticky, 4102 (uchar_t **)&ipp->ipp_dstopts, 4103 &ipp->ipp_dstoptslen, 0); 4104 if (error != 0) 4105 return (error); 4106 ipp->ipp_fields |= IPPF_DSTOPTS; 4107 } 4108 if (sticky) { 4109 error = udp_build_hdrs(q, udp); 4110 if (error != 0) 4111 return (error); 4112 } 4113 break; 4114 } 4115 case IPV6_RTHDR: { 4116 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 4117 4118 /* 4119 * Sanity checks - minimum size, size a multiple of 4120 * eight bytes, and matching size passed in. 4121 */ 4122 if (inlen != 0 && 4123 inlen != (8 * (rt->ip6r_len + 1))) 4124 return (EINVAL); 4125 4126 if (checkonly) 4127 break; 4128 4129 if (inlen == 0) { 4130 if (sticky && 4131 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 4132 kmem_free(ipp->ipp_rthdr, 4133 ipp->ipp_rthdrlen); 4134 ipp->ipp_rthdr = NULL; 4135 ipp->ipp_rthdrlen = 0; 4136 } 4137 ipp->ipp_fields &= ~IPPF_RTHDR; 4138 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 4139 } else { 4140 error = optcom_pkt_set(invalp, inlen, sticky, 4141 (uchar_t **)&ipp->ipp_rthdr, 4142 &ipp->ipp_rthdrlen, 0); 4143 if (error != 0) 4144 return (error); 4145 ipp->ipp_fields |= IPPF_RTHDR; 4146 } 4147 if (sticky) { 4148 error = udp_build_hdrs(q, udp); 4149 if (error != 0) 4150 return (error); 4151 } 4152 break; 4153 } 4154 4155 case IPV6_DONTFRAG: 4156 if (checkonly) 4157 break; 4158 4159 if (onoff) { 4160 ipp->ipp_fields |= IPPF_DONTFRAG; 4161 } else { 4162 ipp->ipp_fields &= ~IPPF_DONTFRAG; 4163 } 4164 break; 4165 4166 case IPV6_USE_MIN_MTU: 4167 if (inlen != sizeof (int)) 4168 return (EINVAL); 4169 4170 if (*i1 < -1 || *i1 > 1) 4171 return (EINVAL); 4172 4173 if (checkonly) 4174 break; 4175 4176 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 4177 ipp->ipp_use_min_mtu = *i1; 4178 break; 4179 4180 case IPV6_BOUND_PIF: 4181 case IPV6_SEC_OPT: 4182 case IPV6_DONTFAILOVER_IF: 4183 case IPV6_SRC_PREFERENCES: 4184 case IPV6_V6ONLY: 4185 /* Handled at the IP level */ 4186 return (-EINVAL); 4187 default: 4188 *outlenp = 0; 4189 return (EINVAL); 4190 } 4191 break; 4192 } /* end IPPROTO_IPV6 */ 4193 case IPPROTO_UDP: 4194 switch (name) { 4195 case UDP_ANONPRIVBIND: 4196 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 4197 *outlenp = 0; 4198 return (error); 4199 } 4200 if (!checkonly) { 4201 udp->udp_anon_priv_bind = onoff; 4202 } 4203 break; 4204 case UDP_EXCLBIND: 4205 if (!checkonly) 4206 udp->udp_exclbind = onoff; 4207 break; 4208 case UDP_RCVHDR: 4209 if (!checkonly) 4210 udp->udp_rcvhdr = onoff; 4211 break; 4212 case UDP_NAT_T_ENDPOINT: 4213 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 4214 *outlenp = 0; 4215 return (error); 4216 } 4217 4218 /* 4219 * Use udp_family instead so we can avoid ambiguitites 4220 * with AF_INET6 sockets that may switch from IPv4 4221 * to IPv6. 4222 */ 4223 if (udp->udp_family != AF_INET) { 4224 *outlenp = 0; 4225 return (EAFNOSUPPORT); 4226 } 4227 4228 if (!checkonly) { 4229 udp->udp_nat_t_endpoint = onoff; 4230 4231 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 4232 UDPH_SIZE + udp->udp_ip_snd_options_len; 4233 4234 /* Also, adjust wroff */ 4235 if (onoff) { 4236 udp->udp_max_hdr_len += 4237 sizeof (uint32_t); 4238 } 4239 (void) mi_set_sth_wroff(RD(q), 4240 udp->udp_max_hdr_len + us->us_wroff_extra); 4241 } 4242 break; 4243 default: 4244 *outlenp = 0; 4245 return (EINVAL); 4246 } 4247 break; 4248 default: 4249 *outlenp = 0; 4250 return (EINVAL); 4251 } 4252 /* 4253 * Common case of OK return with outval same as inval. 4254 */ 4255 if (invalp != outvalp) { 4256 /* don't trust bcopy for identical src/dst */ 4257 (void) bcopy(invalp, outvalp, inlen); 4258 } 4259 *outlenp = inlen; 4260 return (0); 4261 } 4262 4263 /* 4264 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 4265 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 4266 * headers, and the udp header. 4267 * Returns failure if can't allocate memory. 4268 */ 4269 static int 4270 udp_build_hdrs(queue_t *q, udp_t *udp) 4271 { 4272 udp_stack_t *us = udp->udp_us; 4273 uchar_t *hdrs; 4274 uint_t hdrs_len; 4275 ip6_t *ip6h; 4276 ip6i_t *ip6i; 4277 udpha_t *udpha; 4278 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 4279 4280 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 4281 ASSERT(hdrs_len != 0); 4282 if (hdrs_len != udp->udp_sticky_hdrs_len) { 4283 /* Need to reallocate */ 4284 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 4285 if (hdrs == NULL) 4286 return (ENOMEM); 4287 4288 if (udp->udp_sticky_hdrs_len != 0) { 4289 kmem_free(udp->udp_sticky_hdrs, 4290 udp->udp_sticky_hdrs_len); 4291 } 4292 udp->udp_sticky_hdrs = hdrs; 4293 udp->udp_sticky_hdrs_len = hdrs_len; 4294 } 4295 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 4296 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 4297 4298 /* Set header fields not in ipp */ 4299 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 4300 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4301 ip6h = (ip6_t *)&ip6i[1]; 4302 } else { 4303 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4304 } 4305 4306 if (!(ipp->ipp_fields & IPPF_ADDR)) 4307 ip6h->ip6_src = udp->udp_v6src; 4308 4309 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4310 udpha->uha_src_port = udp->udp_port; 4311 4312 /* Try to get everything in a single mblk */ 4313 if (hdrs_len > udp->udp_max_hdr_len) { 4314 udp->udp_max_hdr_len = hdrs_len; 4315 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4316 us->us_wroff_extra); 4317 } 4318 return (0); 4319 } 4320 4321 /* 4322 * This routine retrieves the value of an ND variable in a udpparam_t 4323 * structure. It is called through nd_getset when a user reads the 4324 * variable. 4325 */ 4326 /* ARGSUSED */ 4327 static int 4328 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4329 { 4330 udpparam_t *udppa = (udpparam_t *)cp; 4331 4332 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4333 return (0); 4334 } 4335 4336 /* 4337 * Walk through the param array specified registering each element with the 4338 * named dispatch (ND) handler. 4339 */ 4340 static boolean_t 4341 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 4342 { 4343 for (; cnt-- > 0; udppa++) { 4344 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4345 if (!nd_load(ndp, udppa->udp_param_name, 4346 udp_param_get, udp_param_set, 4347 (caddr_t)udppa)) { 4348 nd_free(ndp); 4349 return (B_FALSE); 4350 } 4351 } 4352 } 4353 if (!nd_load(ndp, "udp_extra_priv_ports", 4354 udp_extra_priv_ports_get, NULL, NULL)) { 4355 nd_free(ndp); 4356 return (B_FALSE); 4357 } 4358 if (!nd_load(ndp, "udp_extra_priv_ports_add", 4359 NULL, udp_extra_priv_ports_add, NULL)) { 4360 nd_free(ndp); 4361 return (B_FALSE); 4362 } 4363 if (!nd_load(ndp, "udp_extra_priv_ports_del", 4364 NULL, udp_extra_priv_ports_del, NULL)) { 4365 nd_free(ndp); 4366 return (B_FALSE); 4367 } 4368 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 4369 NULL)) { 4370 nd_free(ndp); 4371 return (B_FALSE); 4372 } 4373 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 4374 NULL)) { 4375 nd_free(ndp); 4376 return (B_FALSE); 4377 } 4378 return (B_TRUE); 4379 } 4380 4381 /* This routine sets an ND variable in a udpparam_t structure. */ 4382 /* ARGSUSED */ 4383 static int 4384 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4385 { 4386 long new_value; 4387 udpparam_t *udppa = (udpparam_t *)cp; 4388 4389 /* 4390 * Fail the request if the new value does not lie within the 4391 * required bounds. 4392 */ 4393 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4394 new_value < udppa->udp_param_min || 4395 new_value > udppa->udp_param_max) { 4396 return (EINVAL); 4397 } 4398 4399 /* Set the new value */ 4400 udppa->udp_param_value = new_value; 4401 return (0); 4402 } 4403 4404 /* 4405 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 4406 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 4407 * just count the length needed for allocation. If 'dbuf' is non-NULL, 4408 * then it's assumed to be allocated to be large enough. 4409 * 4410 * Returns zero if trimming of the security option causes all options to go 4411 * away. 4412 */ 4413 static size_t 4414 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 4415 { 4416 struct T_opthdr *toh; 4417 size_t hol = ipp->ipp_hopoptslen; 4418 ip6_hbh_t *dstopt = NULL; 4419 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4420 size_t tlen, olen, plen; 4421 boolean_t deleting; 4422 const struct ip6_opt *sopt, *lastpad; 4423 struct ip6_opt *dopt; 4424 4425 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4426 toh->level = IPPROTO_IPV6; 4427 toh->name = IPV6_HOPOPTS; 4428 toh->status = 0; 4429 dstopt = (ip6_hbh_t *)(toh + 1); 4430 } 4431 4432 /* 4433 * If labeling is enabled, then skip the label option 4434 * but get other options if there are any. 4435 */ 4436 if (is_system_labeled()) { 4437 dopt = NULL; 4438 if (dstopt != NULL) { 4439 /* will fill in ip6h_len later */ 4440 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4441 dopt = (struct ip6_opt *)(dstopt + 1); 4442 } 4443 sopt = (const struct ip6_opt *)(srcopt + 1); 4444 hol -= sizeof (*srcopt); 4445 tlen = sizeof (*dstopt); 4446 lastpad = NULL; 4447 deleting = B_FALSE; 4448 /* 4449 * This loop finds the first (lastpad pointer) of any number of 4450 * pads that preceeds the security option, then treats the 4451 * security option as though it were a pad, and then finds the 4452 * next non-pad option (or end of list). 4453 * 4454 * It then treats the entire block as one big pad. To preserve 4455 * alignment of any options that follow, or just the end of the 4456 * list, it computes a minimal new padding size that keeps the 4457 * same alignment for the next option. 4458 * 4459 * If it encounters just a sequence of pads with no security 4460 * option, those are copied as-is rather than collapsed. 4461 * 4462 * Note that to handle the end of list case, the code makes one 4463 * loop with 'hol' set to zero. 4464 */ 4465 for (;;) { 4466 if (hol > 0) { 4467 if (sopt->ip6o_type == IP6OPT_PAD1) { 4468 if (lastpad == NULL) 4469 lastpad = sopt; 4470 sopt = (const struct ip6_opt *) 4471 &sopt->ip6o_len; 4472 hol--; 4473 continue; 4474 } 4475 olen = sopt->ip6o_len + sizeof (*sopt); 4476 if (olen > hol) 4477 olen = hol; 4478 if (sopt->ip6o_type == IP6OPT_PADN || 4479 sopt->ip6o_type == ip6opt_ls) { 4480 if (sopt->ip6o_type == ip6opt_ls) 4481 deleting = B_TRUE; 4482 if (lastpad == NULL) 4483 lastpad = sopt; 4484 sopt = (const struct ip6_opt *) 4485 ((const char *)sopt + olen); 4486 hol -= olen; 4487 continue; 4488 } 4489 } else { 4490 /* if nothing was copied at all, then delete */ 4491 if (tlen == sizeof (*dstopt)) 4492 return (0); 4493 /* last pass; pick up any trailing padding */ 4494 olen = 0; 4495 } 4496 if (deleting) { 4497 /* 4498 * compute aligning effect of deleted material 4499 * to reproduce with pad. 4500 */ 4501 plen = ((const char *)sopt - 4502 (const char *)lastpad) & 7; 4503 tlen += plen; 4504 if (dopt != NULL) { 4505 if (plen == 1) { 4506 dopt->ip6o_type = IP6OPT_PAD1; 4507 } else if (plen > 1) { 4508 plen -= sizeof (*dopt); 4509 dopt->ip6o_type = IP6OPT_PADN; 4510 dopt->ip6o_len = plen; 4511 if (plen > 0) 4512 bzero(dopt + 1, plen); 4513 } 4514 dopt = (struct ip6_opt *) 4515 ((char *)dopt + plen); 4516 } 4517 deleting = B_FALSE; 4518 lastpad = NULL; 4519 } 4520 /* if there's uncopied padding, then copy that now */ 4521 if (lastpad != NULL) { 4522 olen += (const char *)sopt - 4523 (const char *)lastpad; 4524 sopt = lastpad; 4525 lastpad = NULL; 4526 } 4527 if (dopt != NULL && olen > 0) { 4528 bcopy(sopt, dopt, olen); 4529 dopt = (struct ip6_opt *)((char *)dopt + olen); 4530 } 4531 if (hol == 0) 4532 break; 4533 tlen += olen; 4534 sopt = (const struct ip6_opt *) 4535 ((const char *)sopt + olen); 4536 hol -= olen; 4537 } 4538 /* go back and patch up the length value, rounded upward */ 4539 if (dstopt != NULL) 4540 dstopt->ip6h_len = (tlen - 1) >> 3; 4541 } else { 4542 tlen = hol; 4543 if (dstopt != NULL) 4544 bcopy(srcopt, dstopt, hol); 4545 } 4546 4547 tlen += sizeof (*toh); 4548 if (toh != NULL) 4549 toh->len = tlen; 4550 4551 return (tlen); 4552 } 4553 4554 static void 4555 udp_input(conn_t *connp, mblk_t *mp) 4556 { 4557 struct T_unitdata_ind *tudi; 4558 uchar_t *rptr; /* Pointer to IP header */ 4559 int hdr_length; /* Length of IP+UDP headers */ 4560 int udi_size; /* Size of T_unitdata_ind */ 4561 int mp_len; 4562 udp_t *udp; 4563 udpha_t *udpha; 4564 int ipversion; 4565 ip6_pkt_t ipp; 4566 ip6_t *ip6h; 4567 ip6i_t *ip6i; 4568 mblk_t *mp1; 4569 mblk_t *options_mp = NULL; 4570 ip_pktinfo_t *pinfo = NULL; 4571 cred_t *cr = NULL; 4572 queue_t *q = connp->conn_rq; 4573 pid_t cpid; 4574 cred_t *rcr = connp->conn_cred; 4575 udp_stack_t *us; 4576 4577 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4578 "udp_rput_start: q %p mp %p", q, mp); 4579 4580 udp = connp->conn_udp; 4581 us = udp->udp_us; 4582 rptr = mp->b_rptr; 4583 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4584 ASSERT(OK_32PTR(rptr)); 4585 4586 /* 4587 * IP should have prepended the options data in an M_CTL 4588 * Check M_CTL "type" to make sure are not here bcos of 4589 * a valid ICMP message 4590 */ 4591 if (DB_TYPE(mp) == M_CTL) { 4592 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4593 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4594 IN_PKTINFO) { 4595 /* 4596 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4597 * has been appended to the packet by IP. We need to 4598 * extract the mblk and adjust the rptr 4599 */ 4600 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4601 options_mp = mp; 4602 mp = mp->b_cont; 4603 rptr = mp->b_rptr; 4604 UDP_STAT(us, udp_in_pktinfo); 4605 } else { 4606 /* 4607 * ICMP messages. 4608 */ 4609 udp_icmp_error(q, mp); 4610 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4611 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4612 return; 4613 } 4614 } 4615 4616 mp_len = msgdsize(mp); 4617 /* 4618 * This is the inbound data path. 4619 * First, we check to make sure the IP version number is correct, 4620 * and then pull the IP and UDP headers into the first mblk. 4621 * Assume IP provides aligned packets - otherwise toss. 4622 * Also, check if we have a complete IP header. 4623 */ 4624 4625 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4626 ipp.ipp_fields = 0; 4627 4628 ipversion = IPH_HDR_VERSION(rptr); 4629 switch (ipversion) { 4630 case IPV4_VERSION: 4631 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4632 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4633 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4634 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4635 (udp->udp_ip_rcv_options_len)) { 4636 /* 4637 * Handle IPv4 packets with options outside of the 4638 * main data path. Not needed for AF_INET6 sockets 4639 * since they don't support a getsockopt of IP_OPTIONS. 4640 */ 4641 if (udp->udp_family == AF_INET6) 4642 break; 4643 /* 4644 * UDP length check performed for IPv4 packets with 4645 * options to check whether UDP length specified in 4646 * the header is the same as the physical length of 4647 * the packet. 4648 */ 4649 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4650 if (mp_len != (ntohs(udpha->uha_length) + 4651 hdr_length - UDPH_SIZE)) { 4652 goto tossit; 4653 } 4654 /* 4655 * Handle the case where the packet has IP options 4656 * and the IP_RECVSLLA & IP_RECVIF are set 4657 */ 4658 if (pinfo != NULL) 4659 mp = options_mp; 4660 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4661 SQTAG_UDP_INPUT); 4662 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4663 "udp_rput_end: q %p (%S)", q, "end"); 4664 return; 4665 } 4666 4667 /* Handle IPV6_RECVHOPLIMIT. */ 4668 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4669 udp->udp_ip_recvpktinfo) { 4670 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4671 ipp.ipp_fields |= IPPF_IFINDEX; 4672 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4673 } 4674 } 4675 break; 4676 case IPV6_VERSION: 4677 /* 4678 * IPv6 packets can only be received by applications 4679 * that are prepared to receive IPv6 addresses. 4680 * The IP fanout must ensure this. 4681 */ 4682 ASSERT(udp->udp_family == AF_INET6); 4683 4684 ip6h = (ip6_t *)rptr; 4685 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4686 4687 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4688 uint8_t nexthdrp; 4689 /* Look for ifindex information */ 4690 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4691 ip6i = (ip6i_t *)ip6h; 4692 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4693 goto tossit; 4694 4695 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4696 ASSERT(ip6i->ip6i_ifindex != 0); 4697 ipp.ipp_fields |= IPPF_IFINDEX; 4698 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4699 } 4700 rptr = (uchar_t *)&ip6i[1]; 4701 mp->b_rptr = rptr; 4702 if (rptr == mp->b_wptr) { 4703 mp1 = mp->b_cont; 4704 freeb(mp); 4705 mp = mp1; 4706 rptr = mp->b_rptr; 4707 } 4708 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4709 goto tossit; 4710 ip6h = (ip6_t *)rptr; 4711 mp_len = msgdsize(mp); 4712 } 4713 /* 4714 * Find any potentially interesting extension headers 4715 * as well as the length of the IPv6 + extension 4716 * headers. 4717 */ 4718 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4719 UDPH_SIZE; 4720 ASSERT(nexthdrp == IPPROTO_UDP); 4721 } else { 4722 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4723 ip6i = NULL; 4724 } 4725 break; 4726 default: 4727 ASSERT(0); 4728 } 4729 4730 /* 4731 * IP inspected the UDP header thus all of it must be in the mblk. 4732 * UDP length check is performed for IPv6 packets and IPv4 packets 4733 * without options to check if the size of the packet as specified 4734 * by the header is the same as the physical size of the packet. 4735 */ 4736 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4737 if ((MBLKL(mp) < hdr_length) || 4738 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4739 goto tossit; 4740 } 4741 4742 /* Walk past the headers. */ 4743 if (!udp->udp_rcvhdr) { 4744 mp->b_rptr = rptr + hdr_length; 4745 mp_len -= hdr_length; 4746 } 4747 4748 /* 4749 * This is the inbound data path. Packets are passed upstream as 4750 * T_UNITDATA_IND messages with full IP headers still attached. 4751 */ 4752 if (udp->udp_family == AF_INET) { 4753 sin_t *sin; 4754 4755 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4756 4757 /* 4758 * Normally only send up the address. 4759 * If IP_RECVDSTADDR is set we include the destination IP 4760 * address as an option. With IP_RECVOPTS we include all 4761 * the IP options. Only ip_rput_other() handles packets 4762 * that contain IP options. 4763 */ 4764 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4765 if (udp->udp_recvdstaddr) { 4766 udi_size += sizeof (struct T_opthdr) + 4767 sizeof (struct in_addr); 4768 UDP_STAT(us, udp_in_recvdstaddr); 4769 } 4770 4771 if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && 4772 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4773 udi_size += sizeof (struct T_opthdr) + 4774 sizeof (struct in_pktinfo); 4775 UDP_STAT(us, udp_ip_recvpktinfo); 4776 } 4777 4778 /* 4779 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4780 * space accordingly 4781 */ 4782 if (udp->udp_recvif && (pinfo != NULL) && 4783 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4784 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4785 UDP_STAT(us, udp_in_recvif); 4786 } 4787 4788 if (udp->udp_recvslla && (pinfo != NULL) && 4789 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4790 udi_size += sizeof (struct T_opthdr) + 4791 sizeof (struct sockaddr_dl); 4792 UDP_STAT(us, udp_in_recvslla); 4793 } 4794 4795 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4796 udi_size += sizeof (struct T_opthdr) + ucredsize; 4797 cpid = DB_CPID(mp); 4798 UDP_STAT(us, udp_in_recvucred); 4799 } 4800 4801 /* 4802 * If SO_TIMESTAMP is set allocate the appropriate sized 4803 * buffer. Since gethrestime() expects a pointer aligned 4804 * argument, we allocate space necessary for extra 4805 * alignment (even though it might not be used). 4806 */ 4807 if (udp->udp_timestamp) { 4808 udi_size += sizeof (struct T_opthdr) + 4809 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4810 UDP_STAT(us, udp_in_timestamp); 4811 } 4812 4813 /* 4814 * If IP_RECVTTL is set allocate the appropriate sized buffer 4815 */ 4816 if (udp->udp_recvttl) { 4817 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4818 UDP_STAT(us, udp_in_recvttl); 4819 } 4820 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4821 4822 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4823 mp1 = allocb(udi_size, BPRI_MED); 4824 if (mp1 == NULL) { 4825 freemsg(mp); 4826 if (options_mp != NULL) 4827 freeb(options_mp); 4828 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4829 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4830 BUMP_MIB(&udp->udp_mib, udpInErrors); 4831 return; 4832 } 4833 mp1->b_cont = mp; 4834 mp = mp1; 4835 mp->b_datap->db_type = M_PROTO; 4836 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4837 mp->b_wptr = (uchar_t *)tudi + udi_size; 4838 tudi->PRIM_type = T_UNITDATA_IND; 4839 tudi->SRC_length = sizeof (sin_t); 4840 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4841 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4842 sizeof (sin_t); 4843 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4844 tudi->OPT_length = udi_size; 4845 sin = (sin_t *)&tudi[1]; 4846 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4847 sin->sin_port = udpha->uha_src_port; 4848 sin->sin_family = udp->udp_family; 4849 *(uint32_t *)&sin->sin_zero[0] = 0; 4850 *(uint32_t *)&sin->sin_zero[4] = 0; 4851 4852 /* 4853 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4854 * IP_RECVTTL has been set. 4855 */ 4856 if (udi_size != 0) { 4857 /* 4858 * Copy in destination address before options to avoid 4859 * any padding issues. 4860 */ 4861 char *dstopt; 4862 4863 dstopt = (char *)&sin[1]; 4864 if (udp->udp_recvdstaddr) { 4865 struct T_opthdr *toh; 4866 ipaddr_t *dstptr; 4867 4868 toh = (struct T_opthdr *)dstopt; 4869 toh->level = IPPROTO_IP; 4870 toh->name = IP_RECVDSTADDR; 4871 toh->len = sizeof (struct T_opthdr) + 4872 sizeof (ipaddr_t); 4873 toh->status = 0; 4874 dstopt += sizeof (struct T_opthdr); 4875 dstptr = (ipaddr_t *)dstopt; 4876 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4877 dstopt = (char *)toh + toh->len; 4878 udi_size -= toh->len; 4879 } 4880 4881 if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && 4882 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4883 struct T_opthdr *toh; 4884 struct in_pktinfo *pktinfop; 4885 4886 toh = (struct T_opthdr *)dstopt; 4887 toh->level = IPPROTO_IP; 4888 toh->name = IP_PKTINFO; 4889 toh->len = sizeof (struct T_opthdr) + 4890 sizeof (*pktinfop); 4891 toh->status = 0; 4892 dstopt += sizeof (struct T_opthdr); 4893 pktinfop = (struct in_pktinfo *)dstopt; 4894 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4895 pktinfop->ipi_spec_dst = 4896 pinfo->ip_pkt_match_addr; 4897 pktinfop->ipi_addr.s_addr = 4898 ((ipha_t *)rptr)->ipha_dst; 4899 4900 dstopt += sizeof (struct in_pktinfo); 4901 udi_size -= toh->len; 4902 } 4903 4904 if (udp->udp_recvslla && (pinfo != NULL) && 4905 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4906 4907 struct T_opthdr *toh; 4908 struct sockaddr_dl *dstptr; 4909 4910 toh = (struct T_opthdr *)dstopt; 4911 toh->level = IPPROTO_IP; 4912 toh->name = IP_RECVSLLA; 4913 toh->len = sizeof (struct T_opthdr) + 4914 sizeof (struct sockaddr_dl); 4915 toh->status = 0; 4916 dstopt += sizeof (struct T_opthdr); 4917 dstptr = (struct sockaddr_dl *)dstopt; 4918 bcopy(&pinfo->ip_pkt_slla, dstptr, 4919 sizeof (struct sockaddr_dl)); 4920 dstopt = (char *)toh + toh->len; 4921 udi_size -= toh->len; 4922 } 4923 4924 if (udp->udp_recvif && (pinfo != NULL) && 4925 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4926 4927 struct T_opthdr *toh; 4928 uint_t *dstptr; 4929 4930 toh = (struct T_opthdr *)dstopt; 4931 toh->level = IPPROTO_IP; 4932 toh->name = IP_RECVIF; 4933 toh->len = sizeof (struct T_opthdr) + 4934 sizeof (uint_t); 4935 toh->status = 0; 4936 dstopt += sizeof (struct T_opthdr); 4937 dstptr = (uint_t *)dstopt; 4938 *dstptr = pinfo->ip_pkt_ifindex; 4939 dstopt = (char *)toh + toh->len; 4940 udi_size -= toh->len; 4941 } 4942 4943 if (cr != NULL) { 4944 struct T_opthdr *toh; 4945 4946 toh = (struct T_opthdr *)dstopt; 4947 toh->level = SOL_SOCKET; 4948 toh->name = SCM_UCRED; 4949 toh->len = sizeof (struct T_opthdr) + ucredsize; 4950 toh->status = 0; 4951 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4952 dstopt = (char *)toh + toh->len; 4953 udi_size -= toh->len; 4954 } 4955 4956 if (udp->udp_timestamp) { 4957 struct T_opthdr *toh; 4958 4959 toh = (struct T_opthdr *)dstopt; 4960 toh->level = SOL_SOCKET; 4961 toh->name = SCM_TIMESTAMP; 4962 toh->len = sizeof (struct T_opthdr) + 4963 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4964 toh->status = 0; 4965 dstopt += sizeof (struct T_opthdr); 4966 /* Align for gethrestime() */ 4967 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4968 sizeof (intptr_t)); 4969 gethrestime((timestruc_t *)dstopt); 4970 dstopt = (char *)toh + toh->len; 4971 udi_size -= toh->len; 4972 } 4973 4974 /* 4975 * CAUTION: 4976 * Due to aligment issues 4977 * Processing of IP_RECVTTL option 4978 * should always be the last. Adding 4979 * any option processing after this will 4980 * cause alignment panic. 4981 */ 4982 if (udp->udp_recvttl) { 4983 struct T_opthdr *toh; 4984 uint8_t *dstptr; 4985 4986 toh = (struct T_opthdr *)dstopt; 4987 toh->level = IPPROTO_IP; 4988 toh->name = IP_RECVTTL; 4989 toh->len = sizeof (struct T_opthdr) + 4990 sizeof (uint8_t); 4991 toh->status = 0; 4992 dstopt += sizeof (struct T_opthdr); 4993 dstptr = (uint8_t *)dstopt; 4994 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4995 dstopt = (char *)toh + toh->len; 4996 udi_size -= toh->len; 4997 } 4998 4999 /* Consumed all of allocated space */ 5000 ASSERT(udi_size == 0); 5001 } 5002 } else { 5003 sin6_t *sin6; 5004 5005 /* 5006 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 5007 * 5008 * Normally we only send up the address. If receiving of any 5009 * optional receive side information is enabled, we also send 5010 * that up as options. 5011 * [ Only udp_rput_other() handles packets that contain IP 5012 * options so code to account for does not appear immediately 5013 * below but elsewhere ] 5014 */ 5015 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 5016 5017 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 5018 IPPF_RTHDR|IPPF_IFINDEX)) { 5019 if (udp->udp_ipv6_recvhopopts && 5020 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5021 size_t hlen; 5022 5023 UDP_STAT(us, udp_in_recvhopopts); 5024 hlen = copy_hop_opts(&ipp, NULL); 5025 if (hlen == 0) 5026 ipp.ipp_fields &= ~IPPF_HOPOPTS; 5027 udi_size += hlen; 5028 } 5029 if ((udp->udp_ipv6_recvdstopts || 5030 udp->udp_old_ipv6_recvdstopts) && 5031 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5032 udi_size += sizeof (struct T_opthdr) + 5033 ipp.ipp_dstoptslen; 5034 UDP_STAT(us, udp_in_recvdstopts); 5035 } 5036 if (((udp->udp_ipv6_recvdstopts && 5037 udp->udp_ipv6_recvrthdr && 5038 (ipp.ipp_fields & IPPF_RTHDR)) || 5039 udp->udp_ipv6_recvrthdrdstopts) && 5040 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5041 udi_size += sizeof (struct T_opthdr) + 5042 ipp.ipp_rtdstoptslen; 5043 UDP_STAT(us, udp_in_recvrtdstopts); 5044 } 5045 if (udp->udp_ipv6_recvrthdr && 5046 (ipp.ipp_fields & IPPF_RTHDR)) { 5047 udi_size += sizeof (struct T_opthdr) + 5048 ipp.ipp_rthdrlen; 5049 UDP_STAT(us, udp_in_recvrthdr); 5050 } 5051 if (udp->udp_ip_recvpktinfo && 5052 (ipp.ipp_fields & IPPF_IFINDEX)) { 5053 udi_size += sizeof (struct T_opthdr) + 5054 sizeof (struct in6_pktinfo); 5055 UDP_STAT(us, udp_in_recvpktinfo); 5056 } 5057 5058 } 5059 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5060 udi_size += sizeof (struct T_opthdr) + ucredsize; 5061 cpid = DB_CPID(mp); 5062 UDP_STAT(us, udp_in_recvucred); 5063 } 5064 5065 if (udp->udp_ipv6_recvhoplimit) { 5066 udi_size += sizeof (struct T_opthdr) + sizeof (int); 5067 UDP_STAT(us, udp_in_recvhoplimit); 5068 } 5069 5070 if (udp->udp_ipv6_recvtclass) { 5071 udi_size += sizeof (struct T_opthdr) + sizeof (int); 5072 UDP_STAT(us, udp_in_recvtclass); 5073 } 5074 5075 mp1 = allocb(udi_size, BPRI_MED); 5076 if (mp1 == NULL) { 5077 freemsg(mp); 5078 if (options_mp != NULL) 5079 freeb(options_mp); 5080 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5081 "udp_rput_end: q %p (%S)", q, "allocbfail"); 5082 BUMP_MIB(&udp->udp_mib, udpInErrors); 5083 return; 5084 } 5085 mp1->b_cont = mp; 5086 mp = mp1; 5087 mp->b_datap->db_type = M_PROTO; 5088 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5089 mp->b_wptr = (uchar_t *)tudi + udi_size; 5090 tudi->PRIM_type = T_UNITDATA_IND; 5091 tudi->SRC_length = sizeof (sin6_t); 5092 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5093 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 5094 sizeof (sin6_t); 5095 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 5096 tudi->OPT_length = udi_size; 5097 sin6 = (sin6_t *)&tudi[1]; 5098 if (ipversion == IPV4_VERSION) { 5099 in6_addr_t v6dst; 5100 5101 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 5102 &sin6->sin6_addr); 5103 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 5104 &v6dst); 5105 sin6->sin6_flowinfo = 0; 5106 sin6->sin6_scope_id = 0; 5107 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 5108 connp->conn_zoneid, us->us_netstack); 5109 } else { 5110 sin6->sin6_addr = ip6h->ip6_src; 5111 /* No sin6_flowinfo per API */ 5112 sin6->sin6_flowinfo = 0; 5113 /* For link-scope source pass up scope id */ 5114 if ((ipp.ipp_fields & IPPF_IFINDEX) && 5115 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 5116 sin6->sin6_scope_id = ipp.ipp_ifindex; 5117 else 5118 sin6->sin6_scope_id = 0; 5119 sin6->__sin6_src_id = ip_srcid_find_addr( 5120 &ip6h->ip6_dst, connp->conn_zoneid, 5121 us->us_netstack); 5122 } 5123 sin6->sin6_port = udpha->uha_src_port; 5124 sin6->sin6_family = udp->udp_family; 5125 5126 if (udi_size != 0) { 5127 uchar_t *dstopt; 5128 5129 dstopt = (uchar_t *)&sin6[1]; 5130 if (udp->udp_ip_recvpktinfo && 5131 (ipp.ipp_fields & IPPF_IFINDEX)) { 5132 struct T_opthdr *toh; 5133 struct in6_pktinfo *pkti; 5134 5135 toh = (struct T_opthdr *)dstopt; 5136 toh->level = IPPROTO_IPV6; 5137 toh->name = IPV6_PKTINFO; 5138 toh->len = sizeof (struct T_opthdr) + 5139 sizeof (*pkti); 5140 toh->status = 0; 5141 dstopt += sizeof (struct T_opthdr); 5142 pkti = (struct in6_pktinfo *)dstopt; 5143 if (ipversion == IPV6_VERSION) 5144 pkti->ipi6_addr = ip6h->ip6_dst; 5145 else 5146 IN6_IPADDR_TO_V4MAPPED( 5147 ((ipha_t *)rptr)->ipha_dst, 5148 &pkti->ipi6_addr); 5149 pkti->ipi6_ifindex = ipp.ipp_ifindex; 5150 dstopt += sizeof (*pkti); 5151 udi_size -= toh->len; 5152 } 5153 if (udp->udp_ipv6_recvhoplimit) { 5154 struct T_opthdr *toh; 5155 5156 toh = (struct T_opthdr *)dstopt; 5157 toh->level = IPPROTO_IPV6; 5158 toh->name = IPV6_HOPLIMIT; 5159 toh->len = sizeof (struct T_opthdr) + 5160 sizeof (uint_t); 5161 toh->status = 0; 5162 dstopt += sizeof (struct T_opthdr); 5163 if (ipversion == IPV6_VERSION) 5164 *(uint_t *)dstopt = ip6h->ip6_hops; 5165 else 5166 *(uint_t *)dstopt = 5167 ((ipha_t *)rptr)->ipha_ttl; 5168 dstopt += sizeof (uint_t); 5169 udi_size -= toh->len; 5170 } 5171 if (udp->udp_ipv6_recvtclass) { 5172 struct T_opthdr *toh; 5173 5174 toh = (struct T_opthdr *)dstopt; 5175 toh->level = IPPROTO_IPV6; 5176 toh->name = IPV6_TCLASS; 5177 toh->len = sizeof (struct T_opthdr) + 5178 sizeof (uint_t); 5179 toh->status = 0; 5180 dstopt += sizeof (struct T_opthdr); 5181 if (ipversion == IPV6_VERSION) { 5182 *(uint_t *)dstopt = 5183 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 5184 } else { 5185 ipha_t *ipha = (ipha_t *)rptr; 5186 *(uint_t *)dstopt = 5187 ipha->ipha_type_of_service; 5188 } 5189 dstopt += sizeof (uint_t); 5190 udi_size -= toh->len; 5191 } 5192 if (udp->udp_ipv6_recvhopopts && 5193 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5194 size_t hlen; 5195 5196 hlen = copy_hop_opts(&ipp, dstopt); 5197 dstopt += hlen; 5198 udi_size -= hlen; 5199 } 5200 if (udp->udp_ipv6_recvdstopts && 5201 udp->udp_ipv6_recvrthdr && 5202 (ipp.ipp_fields & IPPF_RTHDR) && 5203 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5204 struct T_opthdr *toh; 5205 5206 toh = (struct T_opthdr *)dstopt; 5207 toh->level = IPPROTO_IPV6; 5208 toh->name = IPV6_DSTOPTS; 5209 toh->len = sizeof (struct T_opthdr) + 5210 ipp.ipp_rtdstoptslen; 5211 toh->status = 0; 5212 dstopt += sizeof (struct T_opthdr); 5213 bcopy(ipp.ipp_rtdstopts, dstopt, 5214 ipp.ipp_rtdstoptslen); 5215 dstopt += ipp.ipp_rtdstoptslen; 5216 udi_size -= toh->len; 5217 } 5218 if (udp->udp_ipv6_recvrthdr && 5219 (ipp.ipp_fields & IPPF_RTHDR)) { 5220 struct T_opthdr *toh; 5221 5222 toh = (struct T_opthdr *)dstopt; 5223 toh->level = IPPROTO_IPV6; 5224 toh->name = IPV6_RTHDR; 5225 toh->len = sizeof (struct T_opthdr) + 5226 ipp.ipp_rthdrlen; 5227 toh->status = 0; 5228 dstopt += sizeof (struct T_opthdr); 5229 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 5230 dstopt += ipp.ipp_rthdrlen; 5231 udi_size -= toh->len; 5232 } 5233 if (udp->udp_ipv6_recvdstopts && 5234 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5235 struct T_opthdr *toh; 5236 5237 toh = (struct T_opthdr *)dstopt; 5238 toh->level = IPPROTO_IPV6; 5239 toh->name = IPV6_DSTOPTS; 5240 toh->len = sizeof (struct T_opthdr) + 5241 ipp.ipp_dstoptslen; 5242 toh->status = 0; 5243 dstopt += sizeof (struct T_opthdr); 5244 bcopy(ipp.ipp_dstopts, dstopt, 5245 ipp.ipp_dstoptslen); 5246 dstopt += ipp.ipp_dstoptslen; 5247 udi_size -= toh->len; 5248 } 5249 5250 if (cr != NULL) { 5251 struct T_opthdr *toh; 5252 5253 toh = (struct T_opthdr *)dstopt; 5254 toh->level = SOL_SOCKET; 5255 toh->name = SCM_UCRED; 5256 toh->len = sizeof (struct T_opthdr) + ucredsize; 5257 toh->status = 0; 5258 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5259 dstopt += toh->len; 5260 udi_size -= toh->len; 5261 } 5262 /* Consumed all of allocated space */ 5263 ASSERT(udi_size == 0); 5264 } 5265 #undef sin6 5266 /* No IP_RECVDSTADDR for IPv6. */ 5267 } 5268 5269 BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); 5270 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5271 "udp_rput_end: q %p (%S)", q, "end"); 5272 if (options_mp != NULL) 5273 freeb(options_mp); 5274 5275 if (udp->udp_direct_sockfs) { 5276 /* 5277 * There is nothing above us except for the stream head; 5278 * use the read-side synchronous stream interface in 5279 * order to reduce the time spent in interrupt thread. 5280 */ 5281 ASSERT(udp->udp_issocket); 5282 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 5283 } else { 5284 /* 5285 * Use regular STREAMS interface to pass data upstream 5286 * if this is not a socket endpoint, or if we have 5287 * switched over to the slow mode due to sockmod being 5288 * popped or a module being pushed on top of us. 5289 */ 5290 putnext(UDP_RD(q), mp); 5291 } 5292 return; 5293 5294 tossit: 5295 freemsg(mp); 5296 if (options_mp != NULL) 5297 freeb(options_mp); 5298 BUMP_MIB(&udp->udp_mib, udpInErrors); 5299 } 5300 5301 void 5302 udp_conn_recv(conn_t *connp, mblk_t *mp) 5303 { 5304 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 5305 } 5306 5307 /* ARGSUSED */ 5308 static void 5309 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 5310 { 5311 udp_input((conn_t *)arg, mp); 5312 _UDP_EXIT((conn_t *)arg); 5313 } 5314 5315 /* 5316 * Process non-M_DATA messages as well as M_DATA messages that requires 5317 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 5318 */ 5319 static void 5320 udp_rput_other(queue_t *q, mblk_t *mp) 5321 { 5322 struct T_unitdata_ind *tudi; 5323 mblk_t *mp1; 5324 uchar_t *rptr; 5325 uchar_t *new_rptr; 5326 int hdr_length; 5327 int udi_size; /* Size of T_unitdata_ind */ 5328 int opt_len; /* Length of IP options */ 5329 sin_t *sin; 5330 struct T_error_ack *tea; 5331 mblk_t *options_mp = NULL; 5332 ip_pktinfo_t *pinfo; 5333 boolean_t recv_on = B_FALSE; 5334 cred_t *cr = NULL; 5335 udp_t *udp = Q_TO_UDP(q); 5336 pid_t cpid; 5337 cred_t *rcr = udp->udp_connp->conn_cred; 5338 udp_stack_t *us = udp->udp_us; 5339 5340 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 5341 "udp_rput_other: q %p mp %p", q, mp); 5342 5343 ASSERT(OK_32PTR(mp->b_rptr)); 5344 rptr = mp->b_rptr; 5345 5346 switch (mp->b_datap->db_type) { 5347 case M_CTL: 5348 /* 5349 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 5350 */ 5351 recv_on = B_TRUE; 5352 options_mp = mp; 5353 pinfo = (ip_pktinfo_t *)options_mp->b_rptr; 5354 5355 /* 5356 * The actual data is in mp->b_cont 5357 */ 5358 mp = mp->b_cont; 5359 ASSERT(OK_32PTR(mp->b_rptr)); 5360 rptr = mp->b_rptr; 5361 break; 5362 case M_DATA: 5363 /* 5364 * M_DATA messages contain IPv4 datagrams. They are handled 5365 * after this switch. 5366 */ 5367 break; 5368 case M_PROTO: 5369 case M_PCPROTO: 5370 /* M_PROTO messages contain some type of TPI message. */ 5371 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 5372 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5373 freemsg(mp); 5374 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5375 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 5376 return; 5377 } 5378 tea = (struct T_error_ack *)rptr; 5379 5380 switch (tea->PRIM_type) { 5381 case T_ERROR_ACK: 5382 switch (tea->ERROR_prim) { 5383 case O_T_BIND_REQ: 5384 case T_BIND_REQ: { 5385 /* 5386 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5387 * clear out the associated port and source 5388 * address before passing the message 5389 * upstream. If this was caused by a T_CONN_REQ 5390 * revert back to bound state. 5391 */ 5392 udp_fanout_t *udpf; 5393 5394 udpf = &us->us_bind_fanout[UDP_BIND_HASH( 5395 udp->udp_port, us->us_bind_fanout_size)]; 5396 mutex_enter(&udpf->uf_lock); 5397 if (udp->udp_state == TS_DATA_XFER) { 5398 /* Connect failed */ 5399 tea->ERROR_prim = T_CONN_REQ; 5400 /* Revert back to the bound source */ 5401 udp->udp_v6src = udp->udp_bound_v6src; 5402 udp->udp_state = TS_IDLE; 5403 mutex_exit(&udpf->uf_lock); 5404 if (udp->udp_family == AF_INET6) 5405 (void) udp_build_hdrs(q, udp); 5406 break; 5407 } 5408 5409 if (udp->udp_discon_pending) { 5410 tea->ERROR_prim = T_DISCON_REQ; 5411 udp->udp_discon_pending = 0; 5412 } 5413 V6_SET_ZERO(udp->udp_v6src); 5414 V6_SET_ZERO(udp->udp_bound_v6src); 5415 udp->udp_state = TS_UNBND; 5416 udp_bind_hash_remove(udp, B_TRUE); 5417 udp->udp_port = 0; 5418 mutex_exit(&udpf->uf_lock); 5419 if (udp->udp_family == AF_INET6) 5420 (void) udp_build_hdrs(q, udp); 5421 break; 5422 } 5423 default: 5424 break; 5425 } 5426 break; 5427 case T_BIND_ACK: 5428 udp_rput_bind_ack(q, mp); 5429 return; 5430 5431 case T_OPTMGMT_ACK: 5432 case T_OK_ACK: 5433 break; 5434 default: 5435 freemsg(mp); 5436 return; 5437 } 5438 putnext(UDP_RD(q), mp); 5439 return; 5440 } 5441 5442 /* 5443 * This is the inbound data path. 5444 * First, we make sure the data contains both IP and UDP headers. 5445 * 5446 * This handle IPv4 packets for only AF_INET sockets. 5447 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 5448 * is no need saving the options. 5449 */ 5450 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 5451 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 5452 if (mp->b_wptr - rptr < hdr_length) { 5453 if (!pullupmsg(mp, hdr_length)) { 5454 freemsg(mp); 5455 if (options_mp != NULL) 5456 freeb(options_mp); 5457 BUMP_MIB(&udp->udp_mib, udpInErrors); 5458 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5459 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5460 return; 5461 } 5462 rptr = mp->b_rptr; 5463 } 5464 /* Walk past the headers. */ 5465 new_rptr = rptr + hdr_length; 5466 if (!udp->udp_rcvhdr) 5467 mp->b_rptr = new_rptr; 5468 5469 /* Save the options if any */ 5470 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5471 if (opt_len > 0) { 5472 if (opt_len > udp->udp_ip_rcv_options_len) { 5473 if (udp->udp_ip_rcv_options_len) 5474 mi_free((char *)udp->udp_ip_rcv_options); 5475 udp->udp_ip_rcv_options_len = 0; 5476 udp->udp_ip_rcv_options = 5477 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5478 if (udp->udp_ip_rcv_options) 5479 udp->udp_ip_rcv_options_len = opt_len; 5480 } 5481 if (udp->udp_ip_rcv_options_len) { 5482 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5483 udp->udp_ip_rcv_options, opt_len); 5484 /* Adjust length if we are resusing the space */ 5485 udp->udp_ip_rcv_options_len = opt_len; 5486 } 5487 } else if (udp->udp_ip_rcv_options_len) { 5488 mi_free((char *)udp->udp_ip_rcv_options); 5489 udp->udp_ip_rcv_options = NULL; 5490 udp->udp_ip_rcv_options_len = 0; 5491 } 5492 5493 /* 5494 * Normally only send up the address. 5495 * If IP_RECVDSTADDR is set we include the destination IP 5496 * address as an option. With IP_RECVOPTS we include all 5497 * the IP options. 5498 */ 5499 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5500 if (udp->udp_recvdstaddr) { 5501 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5502 UDP_STAT(us, udp_in_recvdstaddr); 5503 } 5504 5505 if (udp->udp_ip_recvpktinfo && recv_on && 5506 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 5507 udi_size += sizeof (struct T_opthdr) + 5508 sizeof (struct in_pktinfo); 5509 UDP_STAT(us, udp_ip_recvpktinfo); 5510 } 5511 5512 if (udp->udp_recvopts && opt_len > 0) { 5513 udi_size += sizeof (struct T_opthdr) + opt_len; 5514 UDP_STAT(us, udp_in_recvopts); 5515 } 5516 5517 /* 5518 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5519 * space accordingly 5520 */ 5521 if (udp->udp_recvif && recv_on && 5522 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 5523 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5524 UDP_STAT(us, udp_in_recvif); 5525 } 5526 5527 if (udp->udp_recvslla && recv_on && 5528 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 5529 udi_size += sizeof (struct T_opthdr) + 5530 sizeof (struct sockaddr_dl); 5531 UDP_STAT(us, udp_in_recvslla); 5532 } 5533 5534 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5535 udi_size += sizeof (struct T_opthdr) + ucredsize; 5536 cpid = DB_CPID(mp); 5537 UDP_STAT(us, udp_in_recvucred); 5538 } 5539 /* 5540 * If IP_RECVTTL is set allocate the appropriate sized buffer 5541 */ 5542 if (udp->udp_recvttl) { 5543 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5544 UDP_STAT(us, udp_in_recvttl); 5545 } 5546 5547 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5548 mp1 = allocb(udi_size, BPRI_MED); 5549 if (mp1 == NULL) { 5550 freemsg(mp); 5551 if (options_mp != NULL) 5552 freeb(options_mp); 5553 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5554 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5555 BUMP_MIB(&udp->udp_mib, udpInErrors); 5556 return; 5557 } 5558 mp1->b_cont = mp; 5559 mp = mp1; 5560 mp->b_datap->db_type = M_PROTO; 5561 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5562 mp->b_wptr = (uchar_t *)tudi + udi_size; 5563 tudi->PRIM_type = T_UNITDATA_IND; 5564 tudi->SRC_length = sizeof (sin_t); 5565 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5566 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5567 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5568 tudi->OPT_length = udi_size; 5569 5570 sin = (sin_t *)&tudi[1]; 5571 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5572 sin->sin_port = ((in_port_t *) 5573 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5574 sin->sin_family = AF_INET; 5575 *(uint32_t *)&sin->sin_zero[0] = 0; 5576 *(uint32_t *)&sin->sin_zero[4] = 0; 5577 5578 /* 5579 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5580 * IP_RECVTTL has been set. 5581 */ 5582 if (udi_size != 0) { 5583 /* 5584 * Copy in destination address before options to avoid any 5585 * padding issues. 5586 */ 5587 char *dstopt; 5588 5589 dstopt = (char *)&sin[1]; 5590 if (udp->udp_recvdstaddr) { 5591 struct T_opthdr *toh; 5592 ipaddr_t *dstptr; 5593 5594 toh = (struct T_opthdr *)dstopt; 5595 toh->level = IPPROTO_IP; 5596 toh->name = IP_RECVDSTADDR; 5597 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5598 toh->status = 0; 5599 dstopt += sizeof (struct T_opthdr); 5600 dstptr = (ipaddr_t *)dstopt; 5601 *dstptr = (((ipaddr_t *)rptr)[4]); 5602 dstopt += sizeof (ipaddr_t); 5603 udi_size -= toh->len; 5604 } 5605 if (udp->udp_recvopts && udi_size != 0) { 5606 struct T_opthdr *toh; 5607 5608 toh = (struct T_opthdr *)dstopt; 5609 toh->level = IPPROTO_IP; 5610 toh->name = IP_RECVOPTS; 5611 toh->len = sizeof (struct T_opthdr) + opt_len; 5612 toh->status = 0; 5613 dstopt += sizeof (struct T_opthdr); 5614 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5615 dstopt += opt_len; 5616 udi_size -= toh->len; 5617 } 5618 if (udp->udp_ip_recvpktinfo && recv_on && 5619 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 5620 5621 struct T_opthdr *toh; 5622 struct in_pktinfo *pktinfop; 5623 5624 toh = (struct T_opthdr *)dstopt; 5625 toh->level = IPPROTO_IP; 5626 toh->name = IP_PKTINFO; 5627 toh->len = sizeof (struct T_opthdr) + 5628 sizeof (*pktinfop); 5629 toh->status = 0; 5630 dstopt += sizeof (struct T_opthdr); 5631 pktinfop = (struct in_pktinfo *)dstopt; 5632 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 5633 pktinfop->ipi_spec_dst = pinfo->ip_pkt_match_addr; 5634 5635 pktinfop->ipi_addr.s_addr = ((ipha_t *)rptr)->ipha_dst; 5636 5637 dstopt += sizeof (struct in_pktinfo); 5638 udi_size -= toh->len; 5639 } 5640 5641 if (udp->udp_recvslla && recv_on && 5642 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 5643 5644 struct T_opthdr *toh; 5645 struct sockaddr_dl *dstptr; 5646 5647 toh = (struct T_opthdr *)dstopt; 5648 toh->level = IPPROTO_IP; 5649 toh->name = IP_RECVSLLA; 5650 toh->len = sizeof (struct T_opthdr) + 5651 sizeof (struct sockaddr_dl); 5652 toh->status = 0; 5653 dstopt += sizeof (struct T_opthdr); 5654 dstptr = (struct sockaddr_dl *)dstopt; 5655 bcopy(&pinfo->ip_pkt_slla, dstptr, 5656 sizeof (struct sockaddr_dl)); 5657 dstopt += sizeof (struct sockaddr_dl); 5658 udi_size -= toh->len; 5659 } 5660 5661 if (udp->udp_recvif && recv_on && 5662 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 5663 5664 struct T_opthdr *toh; 5665 uint_t *dstptr; 5666 5667 toh = (struct T_opthdr *)dstopt; 5668 toh->level = IPPROTO_IP; 5669 toh->name = IP_RECVIF; 5670 toh->len = sizeof (struct T_opthdr) + 5671 sizeof (uint_t); 5672 toh->status = 0; 5673 dstopt += sizeof (struct T_opthdr); 5674 dstptr = (uint_t *)dstopt; 5675 *dstptr = pinfo->ip_pkt_ifindex; 5676 dstopt += sizeof (uint_t); 5677 udi_size -= toh->len; 5678 } 5679 5680 if (cr != NULL) { 5681 struct T_opthdr *toh; 5682 5683 toh = (struct T_opthdr *)dstopt; 5684 toh->level = SOL_SOCKET; 5685 toh->name = SCM_UCRED; 5686 toh->len = sizeof (struct T_opthdr) + ucredsize; 5687 toh->status = 0; 5688 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5689 dstopt += toh->len; 5690 udi_size -= toh->len; 5691 } 5692 5693 if (udp->udp_recvttl) { 5694 struct T_opthdr *toh; 5695 uint8_t *dstptr; 5696 5697 toh = (struct T_opthdr *)dstopt; 5698 toh->level = IPPROTO_IP; 5699 toh->name = IP_RECVTTL; 5700 toh->len = sizeof (struct T_opthdr) + 5701 sizeof (uint8_t); 5702 toh->status = 0; 5703 dstopt += sizeof (struct T_opthdr); 5704 dstptr = (uint8_t *)dstopt; 5705 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5706 dstopt += sizeof (uint8_t); 5707 udi_size -= toh->len; 5708 } 5709 5710 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5711 } 5712 BUMP_MIB(&udp->udp_mib, udpHCInDatagrams); 5713 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5714 "udp_rput_other_end: q %p (%S)", q, "end"); 5715 if (options_mp != NULL) 5716 freeb(options_mp); 5717 5718 if (udp->udp_direct_sockfs) { 5719 /* 5720 * There is nothing above us except for the stream head; 5721 * use the read-side synchronous stream interface in 5722 * order to reduce the time spent in interrupt thread. 5723 */ 5724 ASSERT(udp->udp_issocket); 5725 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5726 } else { 5727 /* 5728 * Use regular STREAMS interface to pass data upstream 5729 * if this is not a socket endpoint, or if we have 5730 * switched over to the slow mode due to sockmod being 5731 * popped or a module being pushed on top of us. 5732 */ 5733 putnext(UDP_RD(q), mp); 5734 } 5735 } 5736 5737 /* ARGSUSED */ 5738 static void 5739 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5740 { 5741 conn_t *connp = arg; 5742 5743 udp_rput_other(connp->conn_rq, mp); 5744 udp_exit(connp); 5745 } 5746 5747 /* 5748 * Process a T_BIND_ACK 5749 */ 5750 static void 5751 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5752 { 5753 udp_t *udp = Q_TO_UDP(q); 5754 mblk_t *mp1; 5755 ire_t *ire; 5756 struct T_bind_ack *tba; 5757 uchar_t *addrp; 5758 ipa_conn_t *ac; 5759 ipa6_conn_t *ac6; 5760 5761 if (udp->udp_discon_pending) 5762 udp->udp_discon_pending = 0; 5763 5764 /* 5765 * If a broadcast/multicast address was bound set 5766 * the source address to 0. 5767 * This ensures no datagrams with broadcast address 5768 * as source address are emitted (which would violate 5769 * RFC1122 - Hosts requirements) 5770 * 5771 * Note that when connecting the returned IRE is 5772 * for the destination address and we only perform 5773 * the broadcast check for the source address (it 5774 * is OK to connect to a broadcast/multicast address.) 5775 */ 5776 mp1 = mp->b_cont; 5777 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5778 ire = (ire_t *)mp1->b_rptr; 5779 5780 /* 5781 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5782 * local address. 5783 */ 5784 if (ire->ire_type == IRE_BROADCAST && 5785 udp->udp_state != TS_DATA_XFER) { 5786 /* This was just a local bind to a broadcast addr */ 5787 V6_SET_ZERO(udp->udp_v6src); 5788 if (udp->udp_family == AF_INET6) 5789 (void) udp_build_hdrs(q, udp); 5790 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5791 /* 5792 * Local address not yet set - pick it from the 5793 * T_bind_ack 5794 */ 5795 tba = (struct T_bind_ack *)mp->b_rptr; 5796 addrp = &mp->b_rptr[tba->ADDR_offset]; 5797 switch (udp->udp_family) { 5798 case AF_INET: 5799 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5800 ac = (ipa_conn_t *)addrp; 5801 } else { 5802 ASSERT(tba->ADDR_length == 5803 sizeof (ipa_conn_x_t)); 5804 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5805 } 5806 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5807 &udp->udp_v6src); 5808 break; 5809 case AF_INET6: 5810 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5811 ac6 = (ipa6_conn_t *)addrp; 5812 } else { 5813 ASSERT(tba->ADDR_length == 5814 sizeof (ipa6_conn_x_t)); 5815 ac6 = &((ipa6_conn_x_t *) 5816 addrp)->ac6x_conn; 5817 } 5818 udp->udp_v6src = ac6->ac6_laddr; 5819 (void) udp_build_hdrs(q, udp); 5820 break; 5821 } 5822 } 5823 mp1 = mp1->b_cont; 5824 } 5825 /* 5826 * Look for one or more appended ACK message added by 5827 * udp_connect or udp_disconnect. 5828 * If none found just send up the T_BIND_ACK. 5829 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5830 * udp_disconnect has appended a T_OK_ACK. 5831 */ 5832 if (mp1 != NULL) { 5833 if (mp->b_cont == mp1) 5834 mp->b_cont = NULL; 5835 else { 5836 ASSERT(mp->b_cont->b_cont == mp1); 5837 mp->b_cont->b_cont = NULL; 5838 } 5839 freemsg(mp); 5840 mp = mp1; 5841 while (mp != NULL) { 5842 mp1 = mp->b_cont; 5843 mp->b_cont = NULL; 5844 putnext(UDP_RD(q), mp); 5845 mp = mp1; 5846 } 5847 return; 5848 } 5849 freemsg(mp->b_cont); 5850 mp->b_cont = NULL; 5851 putnext(UDP_RD(q), mp); 5852 } 5853 5854 /* 5855 * return SNMP stuff in buffer in mpdata 5856 */ 5857 int 5858 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5859 { 5860 mblk_t *mpdata; 5861 mblk_t *mp_conn_ctl; 5862 mblk_t *mp_attr_ctl; 5863 mblk_t *mp6_conn_ctl; 5864 mblk_t *mp6_attr_ctl; 5865 mblk_t *mp_conn_tail; 5866 mblk_t *mp_attr_tail; 5867 mblk_t *mp6_conn_tail; 5868 mblk_t *mp6_attr_tail; 5869 struct opthdr *optp; 5870 mib2_udpEntry_t ude; 5871 mib2_udp6Entry_t ude6; 5872 mib2_transportMLPEntry_t mlp; 5873 int state; 5874 zoneid_t zoneid; 5875 int i; 5876 connf_t *connfp; 5877 conn_t *connp = Q_TO_CONN(q); 5878 udp_t *udp = connp->conn_udp; 5879 int v4_conn_idx; 5880 int v6_conn_idx; 5881 boolean_t needattr; 5882 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5883 5884 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5885 if (mpctl == NULL || 5886 (mpdata = mpctl->b_cont) == NULL || 5887 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5888 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5889 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5890 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5891 freemsg(mp_conn_ctl); 5892 freemsg(mp_attr_ctl); 5893 freemsg(mp6_conn_ctl); 5894 return (0); 5895 } 5896 5897 zoneid = connp->conn_zoneid; 5898 5899 /* fixed length structure for IPv4 and IPv6 counters */ 5900 SET_MIB(udp->udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5901 SET_MIB(udp->udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5902 /* synchronize 64- and 32-bit counters */ 5903 SYNC32_MIB(&udp->udp_mib, udpInDatagrams, udpHCInDatagrams); 5904 SYNC32_MIB(&udp->udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5905 5906 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5907 optp->level = MIB2_UDP; 5908 optp->name = 0; 5909 (void) snmp_append_data(mpdata, (char *)&udp->udp_mib, 5910 sizeof (udp->udp_mib)); 5911 optp->len = msgdsize(mpdata); 5912 qreply(q, mpctl); 5913 5914 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5915 v4_conn_idx = v6_conn_idx = 0; 5916 5917 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5918 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5919 connp = NULL; 5920 5921 while ((connp = ipcl_get_next_conn(connfp, connp, 5922 IPCL_UDP))) { 5923 udp = connp->conn_udp; 5924 if (zoneid != connp->conn_zoneid) 5925 continue; 5926 5927 /* 5928 * Note that the port numbers are sent in 5929 * host byte order 5930 */ 5931 5932 if (udp->udp_state == TS_UNBND) 5933 state = MIB2_UDP_unbound; 5934 else if (udp->udp_state == TS_IDLE) 5935 state = MIB2_UDP_idle; 5936 else if (udp->udp_state == TS_DATA_XFER) 5937 state = MIB2_UDP_connected; 5938 else 5939 state = MIB2_UDP_unknown; 5940 5941 needattr = B_FALSE; 5942 bzero(&mlp, sizeof (mlp)); 5943 if (connp->conn_mlp_type != mlptSingle) { 5944 if (connp->conn_mlp_type == mlptShared || 5945 connp->conn_mlp_type == mlptBoth) 5946 mlp.tme_flags |= MIB2_TMEF_SHARED; 5947 if (connp->conn_mlp_type == mlptPrivate || 5948 connp->conn_mlp_type == mlptBoth) 5949 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5950 needattr = B_TRUE; 5951 } 5952 5953 /* 5954 * Create an IPv4 table entry for IPv4 entries and also 5955 * any IPv6 entries which are bound to in6addr_any 5956 * (i.e. anything a IPv4 peer could connect/send to). 5957 */ 5958 if (udp->udp_ipversion == IPV4_VERSION || 5959 (udp->udp_state <= TS_IDLE && 5960 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5961 ude.udpEntryInfo.ue_state = state; 5962 /* 5963 * If in6addr_any this will set it to 5964 * INADDR_ANY 5965 */ 5966 ude.udpLocalAddress = 5967 V4_PART_OF_V6(udp->udp_v6src); 5968 ude.udpLocalPort = ntohs(udp->udp_port); 5969 if (udp->udp_state == TS_DATA_XFER) { 5970 /* 5971 * Can potentially get here for 5972 * v6 socket if another process 5973 * (say, ping) has just done a 5974 * sendto(), changing the state 5975 * from the TS_IDLE above to 5976 * TS_DATA_XFER by the time we hit 5977 * this part of the code. 5978 */ 5979 ude.udpEntryInfo.ue_RemoteAddress = 5980 V4_PART_OF_V6(udp->udp_v6dst); 5981 ude.udpEntryInfo.ue_RemotePort = 5982 ntohs(udp->udp_dstport); 5983 } else { 5984 ude.udpEntryInfo.ue_RemoteAddress = 0; 5985 ude.udpEntryInfo.ue_RemotePort = 0; 5986 } 5987 5988 /* 5989 * We make the assumption that all udp_t 5990 * structs will be created within an address 5991 * region no larger than 32-bits. 5992 */ 5993 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5994 ude.udpCreationProcess = 5995 (udp->udp_open_pid < 0) ? 5996 MIB2_UNKNOWN_PROCESS : 5997 udp->udp_open_pid; 5998 ude.udpCreationTime = udp->udp_open_time; 5999 6000 (void) snmp_append_data2(mp_conn_ctl->b_cont, 6001 &mp_conn_tail, (char *)&ude, sizeof (ude)); 6002 mlp.tme_connidx = v4_conn_idx++; 6003 if (needattr) 6004 (void) snmp_append_data2( 6005 mp_attr_ctl->b_cont, &mp_attr_tail, 6006 (char *)&mlp, sizeof (mlp)); 6007 } 6008 if (udp->udp_ipversion == IPV6_VERSION) { 6009 ude6.udp6EntryInfo.ue_state = state; 6010 ude6.udp6LocalAddress = udp->udp_v6src; 6011 ude6.udp6LocalPort = ntohs(udp->udp_port); 6012 ude6.udp6IfIndex = udp->udp_bound_if; 6013 if (udp->udp_state == TS_DATA_XFER) { 6014 ude6.udp6EntryInfo.ue_RemoteAddress = 6015 udp->udp_v6dst; 6016 ude6.udp6EntryInfo.ue_RemotePort = 6017 ntohs(udp->udp_dstport); 6018 } else { 6019 ude6.udp6EntryInfo.ue_RemoteAddress = 6020 sin6_null.sin6_addr; 6021 ude6.udp6EntryInfo.ue_RemotePort = 0; 6022 } 6023 /* 6024 * We make the assumption that all udp_t 6025 * structs will be created within an address 6026 * region no larger than 32-bits. 6027 */ 6028 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 6029 ude6.udp6CreationProcess = 6030 (udp->udp_open_pid < 0) ? 6031 MIB2_UNKNOWN_PROCESS : 6032 udp->udp_open_pid; 6033 ude6.udp6CreationTime = udp->udp_open_time; 6034 6035 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 6036 &mp6_conn_tail, (char *)&ude6, 6037 sizeof (ude6)); 6038 mlp.tme_connidx = v6_conn_idx++; 6039 if (needattr) 6040 (void) snmp_append_data2( 6041 mp6_attr_ctl->b_cont, 6042 &mp6_attr_tail, (char *)&mlp, 6043 sizeof (mlp)); 6044 } 6045 } 6046 } 6047 6048 /* IPv4 UDP endpoints */ 6049 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 6050 sizeof (struct T_optmgmt_ack)]; 6051 optp->level = MIB2_UDP; 6052 optp->name = MIB2_UDP_ENTRY; 6053 optp->len = msgdsize(mp_conn_ctl->b_cont); 6054 qreply(q, mp_conn_ctl); 6055 6056 /* table of MLP attributes... */ 6057 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 6058 sizeof (struct T_optmgmt_ack)]; 6059 optp->level = MIB2_UDP; 6060 optp->name = EXPER_XPORT_MLP; 6061 optp->len = msgdsize(mp_attr_ctl->b_cont); 6062 if (optp->len == 0) 6063 freemsg(mp_attr_ctl); 6064 else 6065 qreply(q, mp_attr_ctl); 6066 6067 /* IPv6 UDP endpoints */ 6068 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 6069 sizeof (struct T_optmgmt_ack)]; 6070 optp->level = MIB2_UDP6; 6071 optp->name = MIB2_UDP6_ENTRY; 6072 optp->len = msgdsize(mp6_conn_ctl->b_cont); 6073 qreply(q, mp6_conn_ctl); 6074 6075 /* table of MLP attributes... */ 6076 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 6077 sizeof (struct T_optmgmt_ack)]; 6078 optp->level = MIB2_UDP6; 6079 optp->name = EXPER_XPORT_MLP; 6080 optp->len = msgdsize(mp6_attr_ctl->b_cont); 6081 if (optp->len == 0) 6082 freemsg(mp6_attr_ctl); 6083 else 6084 qreply(q, mp6_attr_ctl); 6085 6086 return (1); 6087 } 6088 6089 /* 6090 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 6091 * NOTE: Per MIB-II, UDP has no writable data. 6092 * TODO: If this ever actually tries to set anything, it needs to be 6093 * to do the appropriate locking. 6094 */ 6095 /* ARGSUSED */ 6096 int 6097 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 6098 uchar_t *ptr, int len) 6099 { 6100 switch (level) { 6101 case MIB2_UDP: 6102 return (0); 6103 default: 6104 return (1); 6105 } 6106 } 6107 6108 static void 6109 udp_report_item(mblk_t *mp, udp_t *udp) 6110 { 6111 char *state; 6112 char addrbuf1[INET6_ADDRSTRLEN]; 6113 char addrbuf2[INET6_ADDRSTRLEN]; 6114 uint_t print_len, buf_len; 6115 6116 buf_len = mp->b_datap->db_lim - mp->b_wptr; 6117 ASSERT(buf_len >= 0); 6118 if (buf_len == 0) 6119 return; 6120 6121 if (udp->udp_state == TS_UNBND) 6122 state = "UNBOUND"; 6123 else if (udp->udp_state == TS_IDLE) 6124 state = "IDLE"; 6125 else if (udp->udp_state == TS_DATA_XFER) 6126 state = "CONNECTED"; 6127 else 6128 state = "UnkState"; 6129 print_len = snprintf((char *)mp->b_wptr, buf_len, 6130 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 6131 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 6132 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 6133 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 6134 ntohs(udp->udp_dstport), state); 6135 if (print_len < buf_len) { 6136 mp->b_wptr += print_len; 6137 } else { 6138 mp->b_wptr += buf_len; 6139 } 6140 } 6141 6142 /* Report for ndd "udp_status" */ 6143 /* ARGSUSED */ 6144 static int 6145 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 6146 { 6147 zoneid_t zoneid; 6148 connf_t *connfp; 6149 conn_t *connp = Q_TO_CONN(q); 6150 udp_t *udp = connp->conn_udp; 6151 int i; 6152 udp_stack_t *us = udp->udp_us; 6153 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6154 6155 /* 6156 * Because of the ndd constraint, at most we can have 64K buffer 6157 * to put in all UDP info. So to be more efficient, just 6158 * allocate a 64K buffer here, assuming we need that large buffer. 6159 * This may be a problem as any user can read udp_status. Therefore 6160 * we limit the rate of doing this using us_ndd_get_info_interval. 6161 * This should be OK as normal users should not do this too often. 6162 */ 6163 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 6164 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 6165 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 6166 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 6167 return (0); 6168 } 6169 } 6170 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 6171 /* The following may work even if we cannot get a large buf. */ 6172 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 6173 return (0); 6174 } 6175 (void) mi_mpprintf(mp, 6176 "UDP " MI_COL_HDRPAD_STR 6177 /* 12345678[89ABCDEF] */ 6178 " zone lport src addr dest addr port state"); 6179 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 6180 6181 zoneid = connp->conn_zoneid; 6182 6183 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 6184 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 6185 connp = NULL; 6186 6187 while ((connp = ipcl_get_next_conn(connfp, connp, 6188 IPCL_UDP))) { 6189 udp = connp->conn_udp; 6190 if (zoneid != GLOBAL_ZONEID && 6191 zoneid != connp->conn_zoneid) 6192 continue; 6193 6194 udp_report_item(mp->b_cont, udp); 6195 } 6196 } 6197 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 6198 return (0); 6199 } 6200 6201 /* 6202 * This routine creates a T_UDERROR_IND message and passes it upstream. 6203 * The address and options are copied from the T_UNITDATA_REQ message 6204 * passed in mp. This message is freed. 6205 */ 6206 static void 6207 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 6208 t_scalar_t err) 6209 { 6210 struct T_unitdata_req *tudr; 6211 mblk_t *mp1; 6212 uchar_t *optaddr; 6213 t_scalar_t optlen; 6214 6215 if (DB_TYPE(mp) == M_DATA) { 6216 ASSERT(destaddr != NULL && destlen != 0); 6217 optaddr = NULL; 6218 optlen = 0; 6219 } else { 6220 if ((mp->b_wptr < mp->b_rptr) || 6221 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 6222 goto done; 6223 } 6224 tudr = (struct T_unitdata_req *)mp->b_rptr; 6225 destaddr = mp->b_rptr + tudr->DEST_offset; 6226 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 6227 destaddr + tudr->DEST_length < mp->b_rptr || 6228 destaddr + tudr->DEST_length > mp->b_wptr) { 6229 goto done; 6230 } 6231 optaddr = mp->b_rptr + tudr->OPT_offset; 6232 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 6233 optaddr + tudr->OPT_length < mp->b_rptr || 6234 optaddr + tudr->OPT_length > mp->b_wptr) { 6235 goto done; 6236 } 6237 destlen = tudr->DEST_length; 6238 optlen = tudr->OPT_length; 6239 } 6240 6241 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 6242 (char *)optaddr, optlen, err); 6243 if (mp1 != NULL) 6244 putnext(UDP_RD(q), mp1); 6245 6246 done: 6247 freemsg(mp); 6248 } 6249 6250 /* 6251 * This routine removes a port number association from a stream. It 6252 * is called by udp_wput to handle T_UNBIND_REQ messages. 6253 */ 6254 static void 6255 udp_unbind(queue_t *q, mblk_t *mp) 6256 { 6257 udp_t *udp = Q_TO_UDP(q); 6258 6259 /* If a bind has not been done, we can't unbind. */ 6260 if (udp->udp_state == TS_UNBND) { 6261 udp_err_ack(q, mp, TOUTSTATE, 0); 6262 return; 6263 } 6264 if (cl_inet_unbind != NULL) { 6265 /* 6266 * Running in cluster mode - register unbind information 6267 */ 6268 if (udp->udp_ipversion == IPV4_VERSION) { 6269 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 6270 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 6271 (in_port_t)udp->udp_port); 6272 } else { 6273 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 6274 (uint8_t *)&(udp->udp_v6src), 6275 (in_port_t)udp->udp_port); 6276 } 6277 } 6278 6279 udp_bind_hash_remove(udp, B_FALSE); 6280 V6_SET_ZERO(udp->udp_v6src); 6281 V6_SET_ZERO(udp->udp_bound_v6src); 6282 udp->udp_port = 0; 6283 udp->udp_state = TS_UNBND; 6284 6285 if (udp->udp_family == AF_INET6) { 6286 int error; 6287 6288 /* Rebuild the header template */ 6289 error = udp_build_hdrs(q, udp); 6290 if (error != 0) { 6291 udp_err_ack(q, mp, TSYSERR, error); 6292 return; 6293 } 6294 } 6295 /* 6296 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 6297 * and therefore ip_unbind must never return NULL. 6298 */ 6299 mp = ip_unbind(q, mp); 6300 ASSERT(mp != NULL); 6301 putnext(UDP_RD(q), mp); 6302 } 6303 6304 /* 6305 * Don't let port fall into the privileged range. 6306 * Since the extra privileged ports can be arbitrary we also 6307 * ensure that we exclude those from consideration. 6308 * us->us_epriv_ports is not sorted thus we loop over it until 6309 * there are no changes. 6310 */ 6311 static in_port_t 6312 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 6313 { 6314 int i; 6315 in_port_t nextport; 6316 boolean_t restart = B_FALSE; 6317 udp_stack_t *us = udp->udp_us; 6318 6319 if (random && udp_random_anon_port != 0) { 6320 (void) random_get_pseudo_bytes((uint8_t *)&port, 6321 sizeof (in_port_t)); 6322 /* 6323 * Unless changed by a sys admin, the smallest anon port 6324 * is 32768 and the largest anon port is 65535. It is 6325 * very likely (50%) for the random port to be smaller 6326 * than the smallest anon port. When that happens, 6327 * add port % (anon port range) to the smallest anon 6328 * port to get the random port. It should fall into the 6329 * valid anon port range. 6330 */ 6331 if (port < us->us_smallest_anon_port) { 6332 port = us->us_smallest_anon_port + 6333 port % (us->us_largest_anon_port - 6334 us->us_smallest_anon_port); 6335 } 6336 } 6337 6338 retry: 6339 if (port < us->us_smallest_anon_port) 6340 port = us->us_smallest_anon_port; 6341 6342 if (port > us->us_largest_anon_port) { 6343 port = us->us_smallest_anon_port; 6344 if (restart) 6345 return (0); 6346 restart = B_TRUE; 6347 } 6348 6349 if (port < us->us_smallest_nonpriv_port) 6350 port = us->us_smallest_nonpriv_port; 6351 6352 for (i = 0; i < us->us_num_epriv_ports; i++) { 6353 if (port == us->us_epriv_ports[i]) { 6354 port++; 6355 /* 6356 * Make sure that the port is in the 6357 * valid range. 6358 */ 6359 goto retry; 6360 } 6361 } 6362 6363 if (is_system_labeled() && 6364 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 6365 port, IPPROTO_UDP, B_TRUE)) != 0) { 6366 port = nextport; 6367 goto retry; 6368 } 6369 6370 return (port); 6371 } 6372 6373 static int 6374 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 6375 { 6376 int err; 6377 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 6378 udp_t *udp = Q_TO_UDP(wq); 6379 6380 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 6381 opt_storage, udp->udp_mac_exempt, 6382 udp->udp_us->us_netstack->netstack_ip); 6383 if (err == 0) { 6384 err = tsol_update_options(&udp->udp_ip_snd_options, 6385 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 6386 opt_storage); 6387 } 6388 if (err != 0) { 6389 DTRACE_PROBE4( 6390 tx__ip__log__info__updatelabel__udp, 6391 char *, "queue(1) failed to update options(2) on mp(3)", 6392 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6393 } else { 6394 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 6395 } 6396 return (err); 6397 } 6398 6399 static mblk_t * 6400 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 6401 uint_t srcid, int *error, boolean_t insert_spi) 6402 { 6403 udp_t *udp = connp->conn_udp; 6404 queue_t *q = connp->conn_wq; 6405 mblk_t *mp1 = mp; 6406 mblk_t *mp2; 6407 ipha_t *ipha; 6408 int ip_hdr_length; 6409 uint32_t ip_len; 6410 udpha_t *udpha; 6411 udpattrs_t attrs; 6412 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 6413 uint32_t ip_snd_opt_len = 0; 6414 ip4_pkt_t pktinfo; 6415 ip4_pkt_t *pktinfop = &pktinfo; 6416 ip_opt_info_t optinfo; 6417 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6418 udp_stack_t *us = udp->udp_us; 6419 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 6420 6421 6422 *error = 0; 6423 pktinfop->ip4_ill_index = 0; 6424 pktinfop->ip4_addr = INADDR_ANY; 6425 optinfo.ip_opt_flags = 0; 6426 optinfo.ip_opt_ill_index = 0; 6427 6428 if (v4dst == INADDR_ANY) 6429 v4dst = htonl(INADDR_LOOPBACK); 6430 6431 /* 6432 * If options passed in, feed it for verification and handling 6433 */ 6434 attrs.udpattr_credset = B_FALSE; 6435 if (DB_TYPE(mp) != M_DATA) { 6436 mp1 = mp->b_cont; 6437 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6438 attrs.udpattr_ipp4 = pktinfop; 6439 attrs.udpattr_mb = mp; 6440 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 6441 goto done; 6442 /* 6443 * Note: success in processing options. 6444 * mp option buffer represented by 6445 * OPT_length/offset now potentially modified 6446 * and contain option setting results 6447 */ 6448 ASSERT(*error == 0); 6449 } 6450 } 6451 6452 /* mp1 points to the M_DATA mblk carrying the packet */ 6453 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6454 6455 /* 6456 * Check if our saved options are valid; update if not. 6457 * TSOL Note: Since we are not in WRITER mode, UDP packets 6458 * to different destination may require different labels, 6459 * or worse, UDP packets to same IP address may require 6460 * different labels due to use of shared all-zones address. 6461 * We use conn_lock to ensure that lastdst, ip_snd_options, 6462 * and ip_snd_options_len are consistent for the current 6463 * destination and are updated atomically. 6464 */ 6465 mutex_enter(&connp->conn_lock); 6466 if (is_system_labeled()) { 6467 /* Using UDP MLP requires SCM_UCRED from user */ 6468 if (connp->conn_mlp_type != mlptSingle && 6469 !attrs.udpattr_credset) { 6470 mutex_exit(&connp->conn_lock); 6471 DTRACE_PROBE4( 6472 tx__ip__log__info__output__udp, 6473 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6474 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6475 *error = ECONNREFUSED; 6476 goto done; 6477 } 6478 /* 6479 * update label option for this UDP socket if 6480 * - the destination has changed, or 6481 * - the UDP socket is MLP 6482 */ 6483 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 6484 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 6485 connp->conn_mlp_type != mlptSingle) && 6486 (*error = udp_update_label(q, mp, v4dst)) != 0) { 6487 mutex_exit(&connp->conn_lock); 6488 goto done; 6489 } 6490 } 6491 if (udp->udp_ip_snd_options_len > 0) { 6492 ip_snd_opt_len = udp->udp_ip_snd_options_len; 6493 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 6494 } 6495 mutex_exit(&connp->conn_lock); 6496 6497 /* Add an IP header */ 6498 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 6499 (insert_spi ? sizeof (uint32_t) : 0); 6500 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 6501 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 6502 !OK_32PTR(ipha)) { 6503 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 6504 if (mp2 == NULL) { 6505 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6506 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 6507 *error = ENOMEM; 6508 goto done; 6509 } 6510 mp2->b_wptr = DB_LIM(mp2); 6511 mp2->b_cont = mp1; 6512 mp1 = mp2; 6513 if (DB_TYPE(mp) != M_DATA) 6514 mp->b_cont = mp1; 6515 else 6516 mp = mp1; 6517 6518 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 6519 } 6520 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 6521 #ifdef _BIG_ENDIAN 6522 /* Set version, header length, and tos */ 6523 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6524 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 6525 udp->udp_type_of_service); 6526 /* Set ttl and protocol */ 6527 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 6528 #else 6529 /* Set version, header length, and tos */ 6530 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6531 ((udp->udp_type_of_service << 8) | 6532 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 6533 /* Set ttl and protocol */ 6534 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 6535 #endif 6536 if (pktinfop->ip4_addr != INADDR_ANY) { 6537 ipha->ipha_src = pktinfop->ip4_addr; 6538 optinfo.ip_opt_flags = IP_VERIFY_SRC; 6539 } else { 6540 /* 6541 * Copy our address into the packet. If this is zero, 6542 * first look at __sin6_src_id for a hint. If we leave the 6543 * source as INADDR_ANY then ip will fill in the real source 6544 * address. 6545 */ 6546 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 6547 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 6548 in6_addr_t v6src; 6549 6550 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 6551 us->us_netstack); 6552 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 6553 } 6554 } 6555 6556 if (pktinfop->ip4_ill_index != 0) { 6557 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 6558 } 6559 6560 ipha->ipha_fragment_offset_and_flags = 0; 6561 ipha->ipha_ident = 0; 6562 6563 mp1->b_rptr = (uchar_t *)ipha; 6564 6565 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 6566 (uintptr_t)UINT_MAX); 6567 6568 /* Determine length of packet */ 6569 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 6570 if ((mp2 = mp1->b_cont) != NULL) { 6571 do { 6572 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6573 ip_len += (uint32_t)MBLKL(mp2); 6574 } while ((mp2 = mp2->b_cont) != NULL); 6575 } 6576 /* 6577 * If the size of the packet is greater than the maximum allowed by 6578 * ip, return an error. Passing this down could cause panics because 6579 * the size will have wrapped and be inconsistent with the msg size. 6580 */ 6581 if (ip_len > IP_MAXPACKET) { 6582 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6583 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 6584 *error = EMSGSIZE; 6585 goto done; 6586 } 6587 ipha->ipha_length = htons((uint16_t)ip_len); 6588 ip_len -= ip_hdr_length; 6589 ip_len = htons((uint16_t)ip_len); 6590 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 6591 6592 /* Insert all-0s SPI now. */ 6593 if (insert_spi) 6594 *((uint32_t *)(udpha + 1)) = 0; 6595 6596 /* 6597 * Copy in the destination address 6598 */ 6599 ipha->ipha_dst = v4dst; 6600 6601 /* 6602 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 6603 */ 6604 if (CLASSD(v4dst)) 6605 ipha->ipha_ttl = udp->udp_multicast_ttl; 6606 6607 udpha->uha_dst_port = port; 6608 udpha->uha_src_port = udp->udp_port; 6609 6610 if (ip_snd_opt_len > 0) { 6611 uint32_t cksum; 6612 6613 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 6614 /* 6615 * Massage source route putting first source route in ipha_dst. 6616 * Ignore the destination in T_unitdata_req. 6617 * Create a checksum adjustment for a source route, if any. 6618 */ 6619 cksum = ip_massage_options(ipha, us->us_netstack); 6620 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6621 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 6622 (ipha->ipha_dst & 0xFFFF); 6623 if ((int)cksum < 0) 6624 cksum--; 6625 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6626 /* 6627 * IP does the checksum if uha_checksum is non-zero, 6628 * We make it easy for IP to include our pseudo header 6629 * by putting our length in uha_checksum. 6630 */ 6631 cksum += ip_len; 6632 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6633 /* There might be a carry. */ 6634 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6635 #ifdef _LITTLE_ENDIAN 6636 if (us->us_do_checksum) 6637 ip_len = (cksum << 16) | ip_len; 6638 #else 6639 if (us->us_do_checksum) 6640 ip_len = (ip_len << 16) | cksum; 6641 else 6642 ip_len <<= 16; 6643 #endif 6644 } else { 6645 /* 6646 * IP does the checksum if uha_checksum is non-zero, 6647 * We make it easy for IP to include our pseudo header 6648 * by putting our length in uha_checksum. 6649 */ 6650 if (us->us_do_checksum) 6651 ip_len |= (ip_len << 16); 6652 #ifndef _LITTLE_ENDIAN 6653 else 6654 ip_len <<= 16; 6655 #endif 6656 } 6657 6658 /* Set UDP length and checksum */ 6659 *((uint32_t *)&udpha->uha_length) = ip_len; 6660 if (DB_CRED(mp) != NULL) 6661 mblk_setcred(mp1, DB_CRED(mp)); 6662 6663 if (DB_TYPE(mp) != M_DATA) { 6664 ASSERT(mp != mp1); 6665 freeb(mp); 6666 } 6667 6668 /* mp has been consumed and we'll return success */ 6669 ASSERT(*error == 0); 6670 mp = NULL; 6671 6672 /* We're done. Pass the packet to ip. */ 6673 BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); 6674 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6675 "udp_wput_end: q %p (%S)", q, "end"); 6676 6677 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6678 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 6679 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 6680 connp->conn_nofailover_ill != NULL || 6681 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 6682 optinfo.ip_opt_ill_index != 0 || 6683 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6684 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 6685 ipst->ips_ip_g_mrouter != NULL) { 6686 UDP_STAT(us, udp_ip_send); 6687 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 6688 &optinfo); 6689 } else { 6690 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6691 } 6692 6693 done: 6694 if (*error != 0) { 6695 ASSERT(mp != NULL); 6696 BUMP_MIB(&udp->udp_mib, udpOutErrors); 6697 } 6698 return (mp); 6699 } 6700 6701 static void 6702 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6703 { 6704 conn_t *connp = udp->udp_connp; 6705 ipaddr_t src, dst; 6706 ill_t *ill; 6707 ire_t *ire; 6708 ipif_t *ipif = NULL; 6709 mblk_t *ire_fp_mp; 6710 uint_t ire_fp_mp_len; 6711 uint16_t *up; 6712 uint32_t cksum, hcksum_txflags; 6713 queue_t *dev_q; 6714 boolean_t retry_caching; 6715 udp_stack_t *us = udp->udp_us; 6716 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6717 6718 dst = ipha->ipha_dst; 6719 src = ipha->ipha_src; 6720 ASSERT(ipha->ipha_ident == 0); 6721 6722 if (CLASSD(dst)) { 6723 int err; 6724 6725 ipif = conn_get_held_ipif(connp, 6726 &connp->conn_multicast_ipif, &err); 6727 6728 if (ipif == NULL || ipif->ipif_isv6 || 6729 (ipif->ipif_ill->ill_phyint->phyint_flags & 6730 PHYI_LOOPBACK)) { 6731 if (ipif != NULL) 6732 ipif_refrele(ipif); 6733 UDP_STAT(us, udp_ip_send); 6734 ip_output(connp, mp, q, IP_WPUT); 6735 return; 6736 } 6737 } 6738 6739 retry_caching = B_FALSE; 6740 mutex_enter(&connp->conn_lock); 6741 ire = connp->conn_ire_cache; 6742 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6743 6744 if (ire == NULL || ire->ire_addr != dst || 6745 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6746 retry_caching = B_TRUE; 6747 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6748 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6749 6750 ASSERT(ipif != NULL); 6751 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6752 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6753 retry_caching = B_TRUE; 6754 } 6755 6756 if (!retry_caching) { 6757 ASSERT(ire != NULL); 6758 IRE_REFHOLD(ire); 6759 mutex_exit(&connp->conn_lock); 6760 } else { 6761 boolean_t cached = B_FALSE; 6762 6763 connp->conn_ire_cache = NULL; 6764 mutex_exit(&connp->conn_lock); 6765 6766 /* Release the old ire */ 6767 if (ire != NULL) { 6768 IRE_REFRELE_NOTR(ire); 6769 ire = NULL; 6770 } 6771 6772 if (CLASSD(dst)) { 6773 ASSERT(ipif != NULL); 6774 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6775 connp->conn_zoneid, MBLK_GETLABEL(mp), 6776 MATCH_IRE_ILL_GROUP, ipst); 6777 } else { 6778 ASSERT(ipif == NULL); 6779 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6780 MBLK_GETLABEL(mp), ipst); 6781 } 6782 6783 if (ire == NULL) { 6784 if (ipif != NULL) 6785 ipif_refrele(ipif); 6786 UDP_STAT(us, udp_ire_null); 6787 ip_output(connp, mp, q, IP_WPUT); 6788 return; 6789 } 6790 IRE_REFHOLD_NOTR(ire); 6791 6792 mutex_enter(&connp->conn_lock); 6793 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL) { 6794 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6795 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6796 connp->conn_ire_cache = ire; 6797 cached = B_TRUE; 6798 } 6799 rw_exit(&ire->ire_bucket->irb_lock); 6800 } 6801 mutex_exit(&connp->conn_lock); 6802 6803 /* 6804 * We can continue to use the ire but since it was not 6805 * cached, we should drop the extra reference. 6806 */ 6807 if (!cached) 6808 IRE_REFRELE_NOTR(ire); 6809 } 6810 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6811 ASSERT(!CLASSD(dst) || ipif != NULL); 6812 6813 /* 6814 * Check if we can take the fast-path. 6815 * Note that "incomplete" ire's (where the link-layer for next hop 6816 * is not resolved, or where the fast-path header in nce_fp_mp is not 6817 * available yet) are sent down the legacy (slow) path 6818 */ 6819 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6820 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6821 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6822 (connp->conn_nexthop_set) || 6823 (ire->ire_nce == NULL) || 6824 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) || 6825 ((ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp))) { 6826 if (ipif != NULL) 6827 ipif_refrele(ipif); 6828 UDP_STAT(us, udp_ip_ire_send); 6829 IRE_REFRELE(ire); 6830 ip_output(connp, mp, q, IP_WPUT); 6831 return; 6832 } 6833 6834 ill = ire_to_ill(ire); 6835 ASSERT(ill != NULL); 6836 6837 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6838 6839 dev_q = ire->ire_stq->q_next; 6840 ASSERT(dev_q != NULL); 6841 /* 6842 * If the service thread is already running, or if the driver 6843 * queue is currently flow-controlled, queue this packet. 6844 */ 6845 if ((q->q_first != NULL || connp->conn_draining) || 6846 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6847 if (ipst->ips_ip_output_queue) { 6848 (void) putq(q, mp); 6849 } else { 6850 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6851 freemsg(mp); 6852 } 6853 if (ipif != NULL) 6854 ipif_refrele(ipif); 6855 IRE_REFRELE(ire); 6856 return; 6857 } 6858 6859 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6860 #ifndef _BIG_ENDIAN 6861 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6862 #endif 6863 6864 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6865 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6866 src = ipha->ipha_src = ipif->ipif_src_addr; 6867 else 6868 src = ipha->ipha_src = ire->ire_src_addr; 6869 } 6870 6871 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6872 ASSERT(ill->ill_hcksum_capab != NULL); 6873 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6874 } else { 6875 hcksum_txflags = 0; 6876 } 6877 6878 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6879 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6880 6881 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6882 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6883 if (*up != 0) { 6884 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6885 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6886 ntohs(ipha->ipha_length), cksum); 6887 6888 /* Software checksum? */ 6889 if (DB_CKSUMFLAGS(mp) == 0) { 6890 UDP_STAT(us, udp_out_sw_cksum); 6891 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 6892 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6893 } 6894 } 6895 6896 if (!CLASSD(dst)) { 6897 ipha->ipha_fragment_offset_and_flags |= 6898 (uint32_t)htons(ire->ire_frag_flag); 6899 } 6900 6901 /* Calculate IP header checksum if hardware isn't capable */ 6902 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6903 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6904 ((uint16_t *)ipha)[4]); 6905 } 6906 6907 if (CLASSD(dst)) { 6908 ilm_t *ilm; 6909 6910 ILM_WALKER_HOLD(ill); 6911 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6912 ILM_WALKER_RELE(ill); 6913 if (ilm != NULL) { 6914 ip_multicast_loopback(q, ill, mp, 6915 connp->conn_multicast_loop ? 0 : 6916 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6917 } 6918 6919 /* If multicast TTL is 0 then we are done */ 6920 if (ipha->ipha_ttl == 0) { 6921 if (ipif != NULL) 6922 ipif_refrele(ipif); 6923 freemsg(mp); 6924 IRE_REFRELE(ire); 6925 return; 6926 } 6927 } 6928 6929 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6930 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6931 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6932 6933 UPDATE_OB_PKT_COUNT(ire); 6934 ire->ire_last_used_time = lbolt; 6935 6936 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6937 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6938 ntohs(ipha->ipha_length)); 6939 6940 if (ILL_DLS_CAPABLE(ill)) { 6941 /* 6942 * Send the packet directly to DLD, where it may be queued 6943 * depending on the availability of transmit resources at 6944 * the media layer. 6945 */ 6946 IP_DLS_ILL_TX(ill, ipha, mp, ipst); 6947 } else { 6948 DTRACE_PROBE4(ip4__physical__out__start, 6949 ill_t *, NULL, ill_t *, ill, 6950 ipha_t *, ipha, mblk_t *, mp); 6951 FW_HOOKS(ipst->ips_ip4_physical_out_event, 6952 ipst->ips_ipv4firewall_physical_out, 6953 NULL, ill, ipha, mp, mp, ipst); 6954 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6955 if (mp != NULL) 6956 putnext(ire->ire_stq, mp); 6957 } 6958 6959 if (ipif != NULL) 6960 ipif_refrele(ipif); 6961 IRE_REFRELE(ire); 6962 } 6963 6964 static boolean_t 6965 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6966 { 6967 udp_t *udp = Q_TO_UDP(wq); 6968 int err; 6969 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6970 6971 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6972 dst, opt_storage, udp->udp_mac_exempt, 6973 udp->udp_us->us_netstack->netstack_ip); 6974 if (err == 0) { 6975 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6976 &udp->udp_label_len_v6, opt_storage); 6977 } 6978 if (err != 0) { 6979 DTRACE_PROBE4( 6980 tx__ip__log__drop__updatelabel__udp6, 6981 char *, "queue(1) failed to update options(2) on mp(3)", 6982 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6983 } else { 6984 udp->udp_v6lastdst = *dst; 6985 } 6986 return (err); 6987 } 6988 6989 /* 6990 * This routine handles all messages passed downstream. It either 6991 * consumes the message or passes it downstream; it never queues a 6992 * a message. 6993 */ 6994 static void 6995 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6996 { 6997 sin6_t *sin6; 6998 sin_t *sin; 6999 ipaddr_t v4dst; 7000 uint16_t port; 7001 uint_t srcid; 7002 queue_t *q = connp->conn_wq; 7003 udp_t *udp = connp->conn_udp; 7004 int error = 0; 7005 struct sockaddr_storage ss; 7006 udp_stack_t *us = udp->udp_us; 7007 boolean_t insert_spi = udp->udp_nat_t_endpoint; 7008 7009 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 7010 "udp_wput_start: connp %p mp %p", connp, mp); 7011 7012 /* 7013 * We directly handle several cases here: T_UNITDATA_REQ message 7014 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 7015 * connected and non-connected socket. The latter carries the 7016 * address structure along when this routine gets called. 7017 */ 7018 switch (DB_TYPE(mp)) { 7019 case M_DATA: 7020 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 7021 if (!udp->udp_direct_sockfs || 7022 addr == NULL || addrlen == 0) { 7023 /* Not connected; address is required */ 7024 BUMP_MIB(&udp->udp_mib, udpOutErrors); 7025 UDP_STAT(us, udp_out_err_notconn); 7026 freemsg(mp); 7027 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7028 "udp_wput_end: connp %p (%S)", connp, 7029 "not-connected; address required"); 7030 return; 7031 } 7032 ASSERT(udp->udp_issocket); 7033 UDP_DBGSTAT(us, udp_data_notconn); 7034 /* Not connected; do some more checks below */ 7035 break; 7036 } 7037 /* M_DATA for connected socket */ 7038 UDP_DBGSTAT(us, udp_data_conn); 7039 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 7040 7041 /* Initialize addr and addrlen as if they're passed in */ 7042 if (udp->udp_family == AF_INET) { 7043 sin = (sin_t *)&ss; 7044 sin->sin_family = AF_INET; 7045 sin->sin_port = udp->udp_dstport; 7046 sin->sin_addr.s_addr = v4dst; 7047 addr = (struct sockaddr *)sin; 7048 addrlen = sizeof (*sin); 7049 } else { 7050 sin6 = (sin6_t *)&ss; 7051 sin6->sin6_family = AF_INET6; 7052 sin6->sin6_port = udp->udp_dstport; 7053 sin6->sin6_flowinfo = udp->udp_flowinfo; 7054 sin6->sin6_addr = udp->udp_v6dst; 7055 sin6->sin6_scope_id = 0; 7056 sin6->__sin6_src_id = 0; 7057 addr = (struct sockaddr *)sin6; 7058 addrlen = sizeof (*sin6); 7059 } 7060 7061 if (udp->udp_family == AF_INET || 7062 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 7063 /* 7064 * Handle both AF_INET and AF_INET6; the latter 7065 * for IPV4 mapped destination addresses. Note 7066 * here that both addr and addrlen point to the 7067 * corresponding struct depending on the address 7068 * family of the socket. 7069 */ 7070 mp = udp_output_v4(connp, mp, v4dst, 7071 udp->udp_dstport, 0, &error, insert_spi); 7072 } else { 7073 mp = udp_output_v6(connp, mp, sin6, &error); 7074 } 7075 if (error != 0) { 7076 ASSERT(addr != NULL && addrlen != 0); 7077 goto ud_error; 7078 } 7079 return; 7080 case M_PROTO: 7081 case M_PCPROTO: { 7082 struct T_unitdata_req *tudr; 7083 7084 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 7085 tudr = (struct T_unitdata_req *)mp->b_rptr; 7086 7087 /* Handle valid T_UNITDATA_REQ here */ 7088 if (MBLKL(mp) >= sizeof (*tudr) && 7089 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 7090 if (mp->b_cont == NULL) { 7091 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7092 "udp_wput_end: q %p (%S)", q, "badaddr"); 7093 error = EPROTO; 7094 goto ud_error; 7095 } 7096 7097 if (!MBLKIN(mp, 0, tudr->DEST_offset + 7098 tudr->DEST_length)) { 7099 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7100 "udp_wput_end: q %p (%S)", q, "badaddr"); 7101 error = EADDRNOTAVAIL; 7102 goto ud_error; 7103 } 7104 /* 7105 * If a port has not been bound to the stream, fail. 7106 * This is not a problem when sockfs is directly 7107 * above us, because it will ensure that the socket 7108 * is first bound before allowing data to be sent. 7109 */ 7110 if (udp->udp_state == TS_UNBND) { 7111 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7112 "udp_wput_end: q %p (%S)", q, "outstate"); 7113 error = EPROTO; 7114 goto ud_error; 7115 } 7116 addr = (struct sockaddr *) 7117 &mp->b_rptr[tudr->DEST_offset]; 7118 addrlen = tudr->DEST_length; 7119 if (tudr->OPT_length != 0) 7120 UDP_STAT(us, udp_out_opt); 7121 break; 7122 } 7123 /* FALLTHRU */ 7124 } 7125 default: 7126 udp_become_writer(connp, mp, udp_wput_other_wrapper, 7127 SQTAG_UDP_OUTPUT); 7128 return; 7129 } 7130 ASSERT(addr != NULL); 7131 7132 switch (udp->udp_family) { 7133 case AF_INET6: 7134 sin6 = (sin6_t *)addr; 7135 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 7136 sin6->sin6_family != AF_INET6) { 7137 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7138 "udp_wput_end: q %p (%S)", q, "badaddr"); 7139 error = EADDRNOTAVAIL; 7140 goto ud_error; 7141 } 7142 7143 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 7144 /* 7145 * Destination is a non-IPv4-compatible IPv6 address. 7146 * Send out an IPv6 format packet. 7147 */ 7148 mp = udp_output_v6(connp, mp, sin6, &error); 7149 if (error != 0) 7150 goto ud_error; 7151 7152 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7153 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 7154 return; 7155 } 7156 /* 7157 * If the local address is not zero or a mapped address 7158 * return an error. It would be possible to send an IPv4 7159 * packet but the response would never make it back to the 7160 * application since it is bound to a non-mapped address. 7161 */ 7162 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 7163 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7164 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7165 "udp_wput_end: q %p (%S)", q, "badaddr"); 7166 error = EADDRNOTAVAIL; 7167 goto ud_error; 7168 } 7169 /* Send IPv4 packet without modifying udp_ipversion */ 7170 /* Extract port and ipaddr */ 7171 port = sin6->sin6_port; 7172 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 7173 srcid = sin6->__sin6_src_id; 7174 break; 7175 7176 case AF_INET: 7177 sin = (sin_t *)addr; 7178 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 7179 sin->sin_family != AF_INET) { 7180 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7181 "udp_wput_end: q %p (%S)", q, "badaddr"); 7182 error = EADDRNOTAVAIL; 7183 goto ud_error; 7184 } 7185 /* Extract port and ipaddr */ 7186 port = sin->sin_port; 7187 v4dst = sin->sin_addr.s_addr; 7188 srcid = 0; 7189 break; 7190 } 7191 7192 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi); 7193 if (error != 0) { 7194 ud_error: 7195 UDP_STAT(us, udp_out_err_output); 7196 ASSERT(mp != NULL); 7197 /* mp is freed by the following routine */ 7198 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 7199 (t_scalar_t)error); 7200 } 7201 } 7202 7203 /* ARGSUSED */ 7204 static void 7205 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 7206 { 7207 udp_output((conn_t *)arg, mp, NULL, 0); 7208 _UDP_EXIT((conn_t *)arg); 7209 } 7210 7211 static void 7212 udp_wput(queue_t *q, mblk_t *mp) 7213 { 7214 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 7215 SQTAG_UDP_WPUT); 7216 } 7217 7218 /* 7219 * Allocate and prepare a T_UNITDATA_REQ message. 7220 */ 7221 static mblk_t * 7222 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 7223 { 7224 struct T_unitdata_req *tudr; 7225 mblk_t *mp; 7226 7227 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 7228 if (mp != NULL) { 7229 mp->b_wptr += sizeof (*tudr) + addrlen; 7230 DB_TYPE(mp) = M_PROTO; 7231 7232 tudr = (struct T_unitdata_req *)mp->b_rptr; 7233 tudr->PRIM_type = T_UNITDATA_REQ; 7234 tudr->DEST_length = addrlen; 7235 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 7236 tudr->OPT_length = 0; 7237 tudr->OPT_offset = 0; 7238 bcopy(addr, tudr+1, addrlen); 7239 } 7240 return (mp); 7241 } 7242 7243 /* 7244 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 7245 * is valid when we are directly beneath the stream head, and thus sockfs 7246 * is able to bypass STREAMS and directly call us, passing along the sockaddr 7247 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 7248 * this is done for both connected and non-connected endpoint. 7249 */ 7250 void 7251 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 7252 { 7253 conn_t *connp; 7254 udp_t *udp; 7255 udp_stack_t *us; 7256 7257 q = UDP_WR(q); 7258 connp = Q_TO_CONN(q); 7259 udp = connp->conn_udp; 7260 us = udp->udp_us; 7261 7262 /* udpsockfs should only send down M_DATA for this entry point */ 7263 ASSERT(DB_TYPE(mp) == M_DATA); 7264 7265 mutex_enter(&connp->conn_lock); 7266 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 7267 7268 if (udp->udp_mode != UDP_MT_HOT) { 7269 /* 7270 * We can't enter this conn right away because another 7271 * thread is currently executing as writer; therefore we 7272 * need to deposit the message into the squeue to be 7273 * drained later. If a socket address is present, we 7274 * need to create a T_UNITDATA_REQ message as placeholder. 7275 */ 7276 if (addr != NULL && addrlen != 0) { 7277 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 7278 7279 if (tudr_mp == NULL) { 7280 mutex_exit(&connp->conn_lock); 7281 BUMP_MIB(&udp->udp_mib, udpOutErrors); 7282 UDP_STAT(us, udp_out_err_tudr); 7283 freemsg(mp); 7284 return; 7285 } 7286 /* Tag the packet with T_UNITDATA_REQ */ 7287 tudr_mp->b_cont = mp; 7288 mp = tudr_mp; 7289 } 7290 mutex_exit(&connp->conn_lock); 7291 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 7292 return; 7293 } 7294 7295 /* We can execute as reader right away. */ 7296 UDP_READERS_INCREF(udp); 7297 mutex_exit(&connp->conn_lock); 7298 7299 udp_output(connp, mp, addr, addrlen); 7300 7301 udp_exit(connp); 7302 } 7303 7304 /* 7305 * udp_output_v6(): 7306 * Assumes that udp_wput did some sanity checking on the destination 7307 * address. 7308 */ 7309 static mblk_t * 7310 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 7311 { 7312 ip6_t *ip6h; 7313 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 7314 mblk_t *mp1 = mp; 7315 mblk_t *mp2; 7316 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7317 size_t ip_len; 7318 udpha_t *udph; 7319 udp_t *udp = connp->conn_udp; 7320 queue_t *q = connp->conn_wq; 7321 ip6_pkt_t ipp_s; /* For ancillary data options */ 7322 ip6_pkt_t *ipp = &ipp_s; 7323 ip6_pkt_t *tipp; /* temporary ipp */ 7324 uint32_t csum = 0; 7325 uint_t ignore = 0; 7326 uint_t option_exists = 0, is_sticky = 0; 7327 uint8_t *cp; 7328 uint8_t *nxthdr_ptr; 7329 in6_addr_t ip6_dst; 7330 udpattrs_t attrs; 7331 boolean_t opt_present; 7332 ip6_hbh_t *hopoptsptr = NULL; 7333 uint_t hopoptslen = 0; 7334 boolean_t is_ancillary = B_FALSE; 7335 udp_stack_t *us = udp->udp_us; 7336 7337 *error = 0; 7338 7339 /* 7340 * If the local address is a mapped address return 7341 * an error. 7342 * It would be possible to send an IPv6 packet but the 7343 * response would never make it back to the application 7344 * since it is bound to a mapped address. 7345 */ 7346 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 7347 *error = EADDRNOTAVAIL; 7348 goto done; 7349 } 7350 7351 ipp->ipp_fields = 0; 7352 ipp->ipp_sticky_ignored = 0; 7353 7354 /* 7355 * If TPI options passed in, feed it for verification and handling 7356 */ 7357 attrs.udpattr_credset = B_FALSE; 7358 opt_present = B_FALSE; 7359 if (DB_TYPE(mp) != M_DATA) { 7360 mp1 = mp->b_cont; 7361 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 7362 attrs.udpattr_ipp6 = ipp; 7363 attrs.udpattr_mb = mp; 7364 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 7365 goto done; 7366 ASSERT(*error == 0); 7367 opt_present = B_TRUE; 7368 } 7369 } 7370 ignore = ipp->ipp_sticky_ignored; 7371 7372 /* mp1 points to the M_DATA mblk carrying the packet */ 7373 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 7374 7375 if (sin6->sin6_scope_id != 0 && 7376 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 7377 /* 7378 * IPPF_SCOPE_ID is special. It's neither a sticky 7379 * option nor ancillary data. It needs to be 7380 * explicitly set in options_exists. 7381 */ 7382 option_exists |= IPPF_SCOPE_ID; 7383 } 7384 7385 /* 7386 * Compute the destination address 7387 */ 7388 ip6_dst = sin6->sin6_addr; 7389 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 7390 ip6_dst = ipv6_loopback; 7391 7392 /* 7393 * If we're not going to the same destination as last time, then 7394 * recompute the label required. This is done in a separate routine to 7395 * avoid blowing up our stack here. 7396 * 7397 * TSOL Note: Since we are not in WRITER mode, UDP packets 7398 * to different destination may require different labels, 7399 * or worse, UDP packets to same IP address may require 7400 * different labels due to use of shared all-zones address. 7401 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 7402 * and sticky ipp_hopoptslen are consistent for the current 7403 * destination and are updated atomically. 7404 */ 7405 mutex_enter(&connp->conn_lock); 7406 if (is_system_labeled()) { 7407 /* Using UDP MLP requires SCM_UCRED from user */ 7408 if (connp->conn_mlp_type != mlptSingle && 7409 !attrs.udpattr_credset) { 7410 DTRACE_PROBE4( 7411 tx__ip__log__info__output__udp6, 7412 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 7413 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 7414 *error = ECONNREFUSED; 7415 mutex_exit(&connp->conn_lock); 7416 goto done; 7417 } 7418 /* 7419 * update label option for this UDP socket if 7420 * - the destination has changed, or 7421 * - the UDP socket is MLP 7422 */ 7423 if ((opt_present || 7424 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 7425 connp->conn_mlp_type != mlptSingle) && 7426 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 7427 mutex_exit(&connp->conn_lock); 7428 goto done; 7429 } 7430 } 7431 7432 /* 7433 * If there's a security label here, then we ignore any options the 7434 * user may try to set. We keep the peer's label as a hidden sticky 7435 * option. We make a private copy of this label before releasing the 7436 * lock so that label is kept consistent with the destination addr. 7437 */ 7438 if (udp->udp_label_len_v6 > 0) { 7439 ignore &= ~IPPF_HOPOPTS; 7440 ipp->ipp_fields &= ~IPPF_HOPOPTS; 7441 } 7442 7443 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 7444 /* No sticky options nor ancillary data. */ 7445 mutex_exit(&connp->conn_lock); 7446 goto no_options; 7447 } 7448 7449 /* 7450 * Go through the options figuring out where each is going to 7451 * come from and build two masks. The first mask indicates if 7452 * the option exists at all. The second mask indicates if the 7453 * option is sticky or ancillary. 7454 */ 7455 if (!(ignore & IPPF_HOPOPTS)) { 7456 if (ipp->ipp_fields & IPPF_HOPOPTS) { 7457 option_exists |= IPPF_HOPOPTS; 7458 udp_ip_hdr_len += ipp->ipp_hopoptslen; 7459 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 7460 option_exists |= IPPF_HOPOPTS; 7461 is_sticky |= IPPF_HOPOPTS; 7462 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 7463 hopoptsptr = kmem_alloc( 7464 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 7465 if (hopoptsptr == NULL) { 7466 *error = ENOMEM; 7467 mutex_exit(&connp->conn_lock); 7468 goto done; 7469 } 7470 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 7471 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 7472 hopoptslen); 7473 udp_ip_hdr_len += hopoptslen; 7474 } 7475 } 7476 mutex_exit(&connp->conn_lock); 7477 7478 if (!(ignore & IPPF_RTHDR)) { 7479 if (ipp->ipp_fields & IPPF_RTHDR) { 7480 option_exists |= IPPF_RTHDR; 7481 udp_ip_hdr_len += ipp->ipp_rthdrlen; 7482 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 7483 option_exists |= IPPF_RTHDR; 7484 is_sticky |= IPPF_RTHDR; 7485 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 7486 } 7487 } 7488 7489 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 7490 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 7491 option_exists |= IPPF_RTDSTOPTS; 7492 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 7493 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 7494 option_exists |= IPPF_RTDSTOPTS; 7495 is_sticky |= IPPF_RTDSTOPTS; 7496 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 7497 } 7498 } 7499 7500 if (!(ignore & IPPF_DSTOPTS)) { 7501 if (ipp->ipp_fields & IPPF_DSTOPTS) { 7502 option_exists |= IPPF_DSTOPTS; 7503 udp_ip_hdr_len += ipp->ipp_dstoptslen; 7504 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 7505 option_exists |= IPPF_DSTOPTS; 7506 is_sticky |= IPPF_DSTOPTS; 7507 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 7508 } 7509 } 7510 7511 if (!(ignore & IPPF_IFINDEX)) { 7512 if (ipp->ipp_fields & IPPF_IFINDEX) { 7513 option_exists |= IPPF_IFINDEX; 7514 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 7515 option_exists |= IPPF_IFINDEX; 7516 is_sticky |= IPPF_IFINDEX; 7517 } 7518 } 7519 7520 if (!(ignore & IPPF_ADDR)) { 7521 if (ipp->ipp_fields & IPPF_ADDR) { 7522 option_exists |= IPPF_ADDR; 7523 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 7524 option_exists |= IPPF_ADDR; 7525 is_sticky |= IPPF_ADDR; 7526 } 7527 } 7528 7529 if (!(ignore & IPPF_DONTFRAG)) { 7530 if (ipp->ipp_fields & IPPF_DONTFRAG) { 7531 option_exists |= IPPF_DONTFRAG; 7532 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 7533 option_exists |= IPPF_DONTFRAG; 7534 is_sticky |= IPPF_DONTFRAG; 7535 } 7536 } 7537 7538 if (!(ignore & IPPF_USE_MIN_MTU)) { 7539 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 7540 option_exists |= IPPF_USE_MIN_MTU; 7541 } else if (udp->udp_sticky_ipp.ipp_fields & 7542 IPPF_USE_MIN_MTU) { 7543 option_exists |= IPPF_USE_MIN_MTU; 7544 is_sticky |= IPPF_USE_MIN_MTU; 7545 } 7546 } 7547 7548 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 7549 option_exists |= IPPF_HOPLIMIT; 7550 /* IPV6_HOPLIMIT can never be sticky */ 7551 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 7552 7553 if (!(ignore & IPPF_UNICAST_HOPS) && 7554 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 7555 option_exists |= IPPF_UNICAST_HOPS; 7556 is_sticky |= IPPF_UNICAST_HOPS; 7557 } 7558 7559 if (!(ignore & IPPF_MULTICAST_HOPS) && 7560 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 7561 option_exists |= IPPF_MULTICAST_HOPS; 7562 is_sticky |= IPPF_MULTICAST_HOPS; 7563 } 7564 7565 if (!(ignore & IPPF_TCLASS)) { 7566 if (ipp->ipp_fields & IPPF_TCLASS) { 7567 option_exists |= IPPF_TCLASS; 7568 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 7569 option_exists |= IPPF_TCLASS; 7570 is_sticky |= IPPF_TCLASS; 7571 } 7572 } 7573 7574 if (!(ignore & IPPF_NEXTHOP) && 7575 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 7576 option_exists |= IPPF_NEXTHOP; 7577 is_sticky |= IPPF_NEXTHOP; 7578 } 7579 7580 no_options: 7581 7582 /* 7583 * If any options carried in the ip6i_t were specified, we 7584 * need to account for the ip6i_t in the data we'll be sending 7585 * down. 7586 */ 7587 if (option_exists & IPPF_HAS_IP6I) 7588 udp_ip_hdr_len += sizeof (ip6i_t); 7589 7590 /* check/fix buffer config, setup pointers into it */ 7591 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 7592 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 7593 !OK_32PTR(ip6h)) { 7594 /* Try to get everything in a single mblk next time */ 7595 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 7596 udp->udp_max_hdr_len = udp_ip_hdr_len; 7597 (void) mi_set_sth_wroff(UDP_RD(q), 7598 udp->udp_max_hdr_len + us->us_wroff_extra); 7599 } 7600 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 7601 if (mp2 == NULL) { 7602 *error = ENOMEM; 7603 goto done; 7604 } 7605 mp2->b_wptr = DB_LIM(mp2); 7606 mp2->b_cont = mp1; 7607 mp1 = mp2; 7608 if (DB_TYPE(mp) != M_DATA) 7609 mp->b_cont = mp1; 7610 else 7611 mp = mp1; 7612 7613 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 7614 } 7615 mp1->b_rptr = (unsigned char *)ip6h; 7616 ip6i = (ip6i_t *)ip6h; 7617 7618 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 7619 if (option_exists & IPPF_HAS_IP6I) { 7620 ip6h = (ip6_t *)&ip6i[1]; 7621 ip6i->ip6i_flags = 0; 7622 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7623 7624 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 7625 if (option_exists & IPPF_SCOPE_ID) { 7626 ip6i->ip6i_flags |= IP6I_IFINDEX; 7627 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 7628 } else if (option_exists & IPPF_IFINDEX) { 7629 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 7630 ASSERT(tipp->ipp_ifindex != 0); 7631 ip6i->ip6i_flags |= IP6I_IFINDEX; 7632 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 7633 } 7634 7635 if (option_exists & IPPF_ADDR) { 7636 /* 7637 * Enable per-packet source address verification if 7638 * IPV6_PKTINFO specified the source address. 7639 * ip6_src is set in the transport's _wput function. 7640 */ 7641 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7642 } 7643 7644 if (option_exists & IPPF_DONTFRAG) { 7645 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7646 } 7647 7648 if (option_exists & IPPF_USE_MIN_MTU) { 7649 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7650 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7651 } 7652 7653 if (option_exists & IPPF_NEXTHOP) { 7654 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7655 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7656 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7657 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7658 } 7659 7660 /* 7661 * tell IP this is an ip6i_t private header 7662 */ 7663 ip6i->ip6i_nxt = IPPROTO_RAW; 7664 } 7665 7666 /* Initialize IPv6 header */ 7667 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7668 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7669 7670 /* Set the hoplimit of the outgoing packet. */ 7671 if (option_exists & IPPF_HOPLIMIT) { 7672 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7673 ip6h->ip6_hops = ipp->ipp_hoplimit; 7674 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7675 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7676 ip6h->ip6_hops = udp->udp_multicast_ttl; 7677 if (option_exists & IPPF_MULTICAST_HOPS) 7678 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7679 } else { 7680 ip6h->ip6_hops = udp->udp_ttl; 7681 if (option_exists & IPPF_UNICAST_HOPS) 7682 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7683 } 7684 7685 if (option_exists & IPPF_ADDR) { 7686 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7687 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7688 ip6h->ip6_src = tipp->ipp_addr; 7689 } else { 7690 /* 7691 * The source address was not set using IPV6_PKTINFO. 7692 * First look at the bound source. 7693 * If unspecified fallback to __sin6_src_id. 7694 */ 7695 ip6h->ip6_src = udp->udp_v6src; 7696 if (sin6->__sin6_src_id != 0 && 7697 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7698 ip_srcid_find_id(sin6->__sin6_src_id, 7699 &ip6h->ip6_src, connp->conn_zoneid, 7700 us->us_netstack); 7701 } 7702 } 7703 7704 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7705 cp = (uint8_t *)&ip6h[1]; 7706 7707 /* 7708 * Here's where we have to start stringing together 7709 * any extension headers in the right order: 7710 * Hop-by-hop, destination, routing, and final destination opts. 7711 */ 7712 if (option_exists & IPPF_HOPOPTS) { 7713 /* Hop-by-hop options */ 7714 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7715 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7716 if (hopoptslen == 0) { 7717 hopoptsptr = tipp->ipp_hopopts; 7718 hopoptslen = tipp->ipp_hopoptslen; 7719 is_ancillary = B_TRUE; 7720 } 7721 7722 *nxthdr_ptr = IPPROTO_HOPOPTS; 7723 nxthdr_ptr = &hbh->ip6h_nxt; 7724 7725 bcopy(hopoptsptr, cp, hopoptslen); 7726 cp += hopoptslen; 7727 7728 if (hopoptsptr != NULL && !is_ancillary) { 7729 kmem_free(hopoptsptr, hopoptslen); 7730 hopoptsptr = NULL; 7731 hopoptslen = 0; 7732 } 7733 } 7734 /* 7735 * En-route destination options 7736 * Only do them if there's a routing header as well 7737 */ 7738 if (option_exists & IPPF_RTDSTOPTS) { 7739 ip6_dest_t *dst = (ip6_dest_t *)cp; 7740 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7741 7742 *nxthdr_ptr = IPPROTO_DSTOPTS; 7743 nxthdr_ptr = &dst->ip6d_nxt; 7744 7745 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7746 cp += tipp->ipp_rtdstoptslen; 7747 } 7748 /* 7749 * Routing header next 7750 */ 7751 if (option_exists & IPPF_RTHDR) { 7752 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7753 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7754 7755 *nxthdr_ptr = IPPROTO_ROUTING; 7756 nxthdr_ptr = &rt->ip6r_nxt; 7757 7758 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7759 cp += tipp->ipp_rthdrlen; 7760 } 7761 /* 7762 * Do ultimate destination options 7763 */ 7764 if (option_exists & IPPF_DSTOPTS) { 7765 ip6_dest_t *dest = (ip6_dest_t *)cp; 7766 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7767 7768 *nxthdr_ptr = IPPROTO_DSTOPTS; 7769 nxthdr_ptr = &dest->ip6d_nxt; 7770 7771 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7772 cp += tipp->ipp_dstoptslen; 7773 } 7774 /* 7775 * Now set the last header pointer to the proto passed in 7776 */ 7777 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7778 *nxthdr_ptr = IPPROTO_UDP; 7779 7780 /* Update UDP header */ 7781 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7782 udph->uha_dst_port = sin6->sin6_port; 7783 udph->uha_src_port = udp->udp_port; 7784 7785 /* 7786 * Copy in the destination address 7787 */ 7788 ip6h->ip6_dst = ip6_dst; 7789 7790 ip6h->ip6_vcf = 7791 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7792 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7793 7794 if (option_exists & IPPF_TCLASS) { 7795 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7796 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7797 tipp->ipp_tclass); 7798 } 7799 7800 if (option_exists & IPPF_RTHDR) { 7801 ip6_rthdr_t *rth; 7802 7803 /* 7804 * Perform any processing needed for source routing. 7805 * We know that all extension headers will be in the same mblk 7806 * as the IPv6 header. 7807 */ 7808 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7809 if (rth != NULL && rth->ip6r_segleft != 0) { 7810 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7811 /* 7812 * Drop packet - only support Type 0 routing. 7813 * Notify the application as well. 7814 */ 7815 *error = EPROTO; 7816 goto done; 7817 } 7818 7819 /* 7820 * rth->ip6r_len is twice the number of 7821 * addresses in the header. Thus it must be even. 7822 */ 7823 if (rth->ip6r_len & 0x1) { 7824 *error = EPROTO; 7825 goto done; 7826 } 7827 /* 7828 * Shuffle the routing header and ip6_dst 7829 * addresses, and get the checksum difference 7830 * between the first hop (in ip6_dst) and 7831 * the destination (in the last routing hdr entry). 7832 */ 7833 csum = ip_massage_options_v6(ip6h, rth, 7834 us->us_netstack); 7835 /* 7836 * Verify that the first hop isn't a mapped address. 7837 * Routers along the path need to do this verification 7838 * for subsequent hops. 7839 */ 7840 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7841 *error = EADDRNOTAVAIL; 7842 goto done; 7843 } 7844 7845 cp += (rth->ip6r_len + 1)*8; 7846 } 7847 } 7848 7849 /* count up length of UDP packet */ 7850 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7851 if ((mp2 = mp1->b_cont) != NULL) { 7852 do { 7853 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7854 ip_len += (uint32_t)MBLKL(mp2); 7855 } while ((mp2 = mp2->b_cont) != NULL); 7856 } 7857 7858 /* 7859 * If the size of the packet is greater than the maximum allowed by 7860 * ip, return an error. Passing this down could cause panics because 7861 * the size will have wrapped and be inconsistent with the msg size. 7862 */ 7863 if (ip_len > IP_MAXPACKET) { 7864 *error = EMSGSIZE; 7865 goto done; 7866 } 7867 7868 /* Store the UDP length. Subtract length of extension hdrs */ 7869 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7870 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7871 7872 /* 7873 * We make it easy for IP to include our pseudo header 7874 * by putting our length in uh_checksum, modified (if 7875 * we have a routing header) by the checksum difference 7876 * between the ultimate destination and first hop addresses. 7877 * Note: UDP over IPv6 must always checksum the packet. 7878 */ 7879 csum += udph->uha_length; 7880 csum = (csum & 0xFFFF) + (csum >> 16); 7881 udph->uha_checksum = (uint16_t)csum; 7882 7883 #ifdef _LITTLE_ENDIAN 7884 ip_len = htons(ip_len); 7885 #endif 7886 ip6h->ip6_plen = ip_len; 7887 if (DB_CRED(mp) != NULL) 7888 mblk_setcred(mp1, DB_CRED(mp)); 7889 7890 if (DB_TYPE(mp) != M_DATA) { 7891 ASSERT(mp != mp1); 7892 freeb(mp); 7893 } 7894 7895 /* mp has been consumed and we'll return success */ 7896 ASSERT(*error == 0); 7897 mp = NULL; 7898 7899 /* We're done. Pass the packet to IP */ 7900 BUMP_MIB(&udp->udp_mib, udpHCOutDatagrams); 7901 ip_output_v6(connp, mp1, q, IP_WPUT); 7902 7903 done: 7904 if (hopoptsptr != NULL && !is_ancillary) { 7905 kmem_free(hopoptsptr, hopoptslen); 7906 hopoptsptr = NULL; 7907 } 7908 if (*error != 0) { 7909 ASSERT(mp != NULL); 7910 BUMP_MIB(&udp->udp_mib, udpOutErrors); 7911 } 7912 return (mp); 7913 } 7914 7915 static void 7916 udp_wput_other(queue_t *q, mblk_t *mp) 7917 { 7918 uchar_t *rptr = mp->b_rptr; 7919 struct datab *db; 7920 struct iocblk *iocp; 7921 cred_t *cr; 7922 conn_t *connp = Q_TO_CONN(q); 7923 udp_t *udp = connp->conn_udp; 7924 udp_stack_t *us; 7925 7926 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7927 "udp_wput_other_start: q %p", q); 7928 7929 us = udp->udp_us; 7930 db = mp->b_datap; 7931 7932 cr = DB_CREDDEF(mp, connp->conn_cred); 7933 7934 switch (db->db_type) { 7935 case M_PROTO: 7936 case M_PCPROTO: 7937 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7938 freemsg(mp); 7939 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7940 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 7941 return; 7942 } 7943 switch (((t_primp_t)rptr)->type) { 7944 case T_ADDR_REQ: 7945 udp_addr_req(q, mp); 7946 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7947 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7948 return; 7949 case O_T_BIND_REQ: 7950 case T_BIND_REQ: 7951 udp_bind(q, mp); 7952 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7953 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7954 return; 7955 case T_CONN_REQ: 7956 udp_connect(q, mp); 7957 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7958 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7959 return; 7960 case T_CAPABILITY_REQ: 7961 udp_capability_req(q, mp); 7962 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7963 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7964 return; 7965 case T_INFO_REQ: 7966 udp_info_req(q, mp); 7967 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7968 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7969 return; 7970 case T_UNITDATA_REQ: 7971 /* 7972 * If a T_UNITDATA_REQ gets here, the address must 7973 * be bad. Valid T_UNITDATA_REQs are handled 7974 * in udp_wput. 7975 */ 7976 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7977 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7978 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7979 return; 7980 case T_UNBIND_REQ: 7981 udp_unbind(q, mp); 7982 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7983 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7984 return; 7985 case T_SVR4_OPTMGMT_REQ: 7986 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7987 /* 7988 * Use upper queue for option processing in 7989 * case the request is not handled at this 7990 * level and needs to be passed down to IP. 7991 */ 7992 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7993 mp, cr, &udp_opt_obj); 7994 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7995 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7996 return; 7997 7998 case T_OPTMGMT_REQ: 7999 /* 8000 * Use upper queue for option processing in 8001 * case the request is not handled at this 8002 * level and needs to be passed down to IP. 8003 */ 8004 (void) tpi_optcom_req(_WR(UDP_RD(q)), 8005 mp, cr, &udp_opt_obj); 8006 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8007 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 8008 return; 8009 8010 case T_DISCON_REQ: 8011 udp_disconnect(q, mp); 8012 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8013 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 8014 return; 8015 8016 /* The following TPI message is not supported by udp. */ 8017 case O_T_CONN_RES: 8018 case T_CONN_RES: 8019 udp_err_ack(q, mp, TNOTSUPPORT, 0); 8020 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8021 "udp_wput_other_end: q %p (%S)", q, 8022 "connres/disconreq"); 8023 return; 8024 8025 /* The following 3 TPI messages are illegal for udp. */ 8026 case T_DATA_REQ: 8027 case T_EXDATA_REQ: 8028 case T_ORDREL_REQ: 8029 udp_err_ack(q, mp, TNOTSUPPORT, 0); 8030 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8031 "udp_wput_other_end: q %p (%S)", q, 8032 "data/exdata/ordrel"); 8033 return; 8034 default: 8035 break; 8036 } 8037 break; 8038 case M_FLUSH: 8039 if (*rptr & FLUSHW) 8040 flushq(q, FLUSHDATA); 8041 break; 8042 case M_IOCTL: 8043 iocp = (struct iocblk *)mp->b_rptr; 8044 switch (iocp->ioc_cmd) { 8045 case TI_GETPEERNAME: 8046 if (udp->udp_state != TS_DATA_XFER) { 8047 /* 8048 * If a default destination address has not 8049 * been associated with the stream, then we 8050 * don't know the peer's name. 8051 */ 8052 iocp->ioc_error = ENOTCONN; 8053 iocp->ioc_count = 0; 8054 mp->b_datap->db_type = M_IOCACK; 8055 putnext(UDP_RD(q), mp); 8056 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8057 "udp_wput_other_end: q %p (%S)", q, 8058 "getpeername"); 8059 return; 8060 } 8061 /* FALLTHRU */ 8062 case TI_GETMYNAME: { 8063 /* 8064 * For TI_GETPEERNAME and TI_GETMYNAME, we first 8065 * need to copyin the user's strbuf structure. 8066 * Processing will continue in the M_IOCDATA case 8067 * below. 8068 */ 8069 mi_copyin(q, mp, NULL, 8070 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 8071 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8072 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 8073 return; 8074 } 8075 case ND_SET: 8076 /* nd_getset performs the necessary checking */ 8077 case ND_GET: 8078 if (nd_getset(q, us->us_nd, mp)) { 8079 putnext(UDP_RD(q), mp); 8080 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8081 "udp_wput_other_end: q %p (%S)", q, "get"); 8082 return; 8083 } 8084 break; 8085 case _SIOCSOCKFALLBACK: 8086 /* 8087 * Either sockmod is about to be popped and the 8088 * socket would now be treated as a plain stream, 8089 * or a module is about to be pushed so we could 8090 * no longer use read-side synchronous stream. 8091 * Drain any queued data and disable direct sockfs 8092 * interface from now on. 8093 */ 8094 if (!udp->udp_issocket) { 8095 DB_TYPE(mp) = M_IOCNAK; 8096 iocp->ioc_error = EINVAL; 8097 } else { 8098 udp->udp_issocket = B_FALSE; 8099 if (udp->udp_direct_sockfs) { 8100 /* 8101 * Disable read-side synchronous 8102 * stream interface and drain any 8103 * queued data. 8104 */ 8105 udp_rcv_drain(UDP_RD(q), udp, 8106 B_FALSE); 8107 ASSERT(!udp->udp_direct_sockfs); 8108 UDP_STAT(us, udp_sock_fallback); 8109 } 8110 DB_TYPE(mp) = M_IOCACK; 8111 iocp->ioc_error = 0; 8112 } 8113 iocp->ioc_count = 0; 8114 iocp->ioc_rval = 0; 8115 putnext(UDP_RD(q), mp); 8116 return; 8117 default: 8118 break; 8119 } 8120 break; 8121 case M_IOCDATA: 8122 udp_wput_iocdata(q, mp); 8123 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8124 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 8125 return; 8126 default: 8127 /* Unrecognized messages are passed through without change. */ 8128 break; 8129 } 8130 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8131 "udp_wput_other_end: q %p (%S)", q, "end"); 8132 ip_output(connp, mp, q, IP_WPUT); 8133 } 8134 8135 /* ARGSUSED */ 8136 static void 8137 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 8138 { 8139 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 8140 udp_exit((conn_t *)arg); 8141 } 8142 8143 /* 8144 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 8145 * messages. 8146 */ 8147 static void 8148 udp_wput_iocdata(queue_t *q, mblk_t *mp) 8149 { 8150 mblk_t *mp1; 8151 STRUCT_HANDLE(strbuf, sb); 8152 uint16_t port; 8153 in6_addr_t v6addr; 8154 ipaddr_t v4addr; 8155 uint32_t flowinfo = 0; 8156 int addrlen; 8157 udp_t *udp = Q_TO_UDP(q); 8158 8159 /* Make sure it is one of ours. */ 8160 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8161 case TI_GETMYNAME: 8162 case TI_GETPEERNAME: 8163 break; 8164 default: 8165 ip_output(udp->udp_connp, mp, q, IP_WPUT); 8166 return; 8167 } 8168 8169 q = WR(UDP_RD(q)); 8170 switch (mi_copy_state(q, mp, &mp1)) { 8171 case -1: 8172 return; 8173 case MI_COPY_CASE(MI_COPY_IN, 1): 8174 break; 8175 case MI_COPY_CASE(MI_COPY_OUT, 1): 8176 /* 8177 * The address has been copied out, so now 8178 * copyout the strbuf. 8179 */ 8180 mi_copyout(q, mp); 8181 return; 8182 case MI_COPY_CASE(MI_COPY_OUT, 2): 8183 /* 8184 * The address and strbuf have been copied out. 8185 * We're done, so just acknowledge the original 8186 * M_IOCTL. 8187 */ 8188 mi_copy_done(q, mp, 0); 8189 return; 8190 default: 8191 /* 8192 * Something strange has happened, so acknowledge 8193 * the original M_IOCTL with an EPROTO error. 8194 */ 8195 mi_copy_done(q, mp, EPROTO); 8196 return; 8197 } 8198 8199 /* 8200 * Now we have the strbuf structure for TI_GETMYNAME 8201 * and TI_GETPEERNAME. Next we copyout the requested 8202 * address and then we'll copyout the strbuf. 8203 */ 8204 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 8205 (void *)mp1->b_rptr); 8206 if (udp->udp_family == AF_INET) 8207 addrlen = sizeof (sin_t); 8208 else 8209 addrlen = sizeof (sin6_t); 8210 8211 if (STRUCT_FGET(sb, maxlen) < addrlen) { 8212 mi_copy_done(q, mp, EINVAL); 8213 return; 8214 } 8215 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8216 case TI_GETMYNAME: 8217 if (udp->udp_family == AF_INET) { 8218 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8219 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 8220 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8221 v4addr = V4_PART_OF_V6(udp->udp_v6src); 8222 } else { 8223 /* 8224 * INADDR_ANY 8225 * udp_v6src is not set, we might be bound to 8226 * broadcast/multicast. Use udp_bound_v6src as 8227 * local address instead (that could 8228 * also still be INADDR_ANY) 8229 */ 8230 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 8231 } 8232 } else { 8233 /* udp->udp_family == AF_INET6 */ 8234 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8235 v6addr = udp->udp_v6src; 8236 } else { 8237 /* 8238 * UNSPECIFIED 8239 * udp_v6src is not set, we might be bound to 8240 * broadcast/multicast. Use udp_bound_v6src as 8241 * local address instead (that could 8242 * also still be UNSPECIFIED) 8243 */ 8244 v6addr = udp->udp_bound_v6src; 8245 } 8246 } 8247 port = udp->udp_port; 8248 break; 8249 case TI_GETPEERNAME: 8250 if (udp->udp_state != TS_DATA_XFER) { 8251 mi_copy_done(q, mp, ENOTCONN); 8252 return; 8253 } 8254 if (udp->udp_family == AF_INET) { 8255 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8256 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 8257 } else { 8258 /* udp->udp_family == AF_INET6) */ 8259 v6addr = udp->udp_v6dst; 8260 flowinfo = udp->udp_flowinfo; 8261 } 8262 port = udp->udp_dstport; 8263 break; 8264 default: 8265 mi_copy_done(q, mp, EPROTO); 8266 return; 8267 } 8268 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 8269 if (!mp1) 8270 return; 8271 8272 if (udp->udp_family == AF_INET) { 8273 sin_t *sin; 8274 8275 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 8276 sin = (sin_t *)mp1->b_rptr; 8277 mp1->b_wptr = (uchar_t *)&sin[1]; 8278 *sin = sin_null; 8279 sin->sin_family = AF_INET; 8280 sin->sin_addr.s_addr = v4addr; 8281 sin->sin_port = port; 8282 } else { 8283 /* udp->udp_family == AF_INET6 */ 8284 sin6_t *sin6; 8285 8286 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 8287 sin6 = (sin6_t *)mp1->b_rptr; 8288 mp1->b_wptr = (uchar_t *)&sin6[1]; 8289 *sin6 = sin6_null; 8290 sin6->sin6_family = AF_INET6; 8291 sin6->sin6_flowinfo = flowinfo; 8292 sin6->sin6_addr = v6addr; 8293 sin6->sin6_port = port; 8294 } 8295 /* Copy out the address */ 8296 mi_copyout(q, mp); 8297 } 8298 8299 8300 static int 8301 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 8302 udpattrs_t *udpattrs) 8303 { 8304 struct T_unitdata_req *udreqp; 8305 int is_absreq_failure; 8306 cred_t *cr; 8307 conn_t *connp = Q_TO_CONN(q); 8308 8309 ASSERT(((t_primp_t)mp->b_rptr)->type); 8310 8311 cr = DB_CREDDEF(mp, connp->conn_cred); 8312 8313 udreqp = (struct T_unitdata_req *)mp->b_rptr; 8314 8315 /* 8316 * Use upper queue for option processing since the callback 8317 * routines expect to be called in UDP instance instead of IP. 8318 */ 8319 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 8320 udreqp->OPT_offset, cr, &udp_opt_obj, 8321 udpattrs, &is_absreq_failure); 8322 8323 if (*errorp != 0) { 8324 /* 8325 * Note: No special action needed in this 8326 * module for "is_absreq_failure" 8327 */ 8328 return (-1); /* failure */ 8329 } 8330 ASSERT(is_absreq_failure == 0); 8331 return (0); /* success */ 8332 } 8333 8334 void 8335 udp_ddi_init(void) 8336 { 8337 UDP6_MAJ = ddi_name_to_major(UDP6); 8338 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 8339 udp_opt_obj.odb_opt_arr_cnt); 8340 8341 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 8342 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 8343 8344 /* 8345 * We want to be informed each time a stack is created or 8346 * destroyed in the kernel, so we can maintain the 8347 * set of udp_stack_t's. 8348 */ 8349 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 8350 } 8351 8352 void 8353 udp_ddi_destroy(void) 8354 { 8355 netstack_unregister(NS_UDP); 8356 8357 kmem_cache_destroy(udp_cache); 8358 } 8359 8360 /* 8361 * Initialize the UDP stack instance. 8362 */ 8363 static void * 8364 udp_stack_init(netstackid_t stackid, netstack_t *ns) 8365 { 8366 udp_stack_t *us; 8367 udpparam_t *pa; 8368 int i; 8369 8370 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 8371 us->us_netstack = ns; 8372 8373 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 8374 us->us_epriv_ports[0] = 2049; 8375 us->us_epriv_ports[1] = 4045; 8376 8377 /* 8378 * The smallest anonymous port in the priviledged port range which UDP 8379 * looks for free port. Use in the option UDP_ANONPRIVBIND. 8380 */ 8381 us->us_min_anonpriv_port = 512; 8382 8383 us->us_bind_fanout_size = udp_bind_fanout_size; 8384 8385 /* Roundup variable that might have been modified in /etc/system */ 8386 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 8387 /* Not a power of two. Round up to nearest power of two */ 8388 for (i = 0; i < 31; i++) { 8389 if (us->us_bind_fanout_size < (1 << i)) 8390 break; 8391 } 8392 us->us_bind_fanout_size = 1 << i; 8393 } 8394 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 8395 sizeof (udp_fanout_t), KM_SLEEP); 8396 for (i = 0; i < us->us_bind_fanout_size; i++) { 8397 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 8398 NULL); 8399 } 8400 8401 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 8402 8403 us->us_param_arr = pa; 8404 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 8405 8406 (void) udp_param_register(&us->us_nd, 8407 us->us_param_arr, A_CNT(udp_param_arr)); 8408 8409 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 8410 us->us_mibkp = udp_kstat_init(stackid); 8411 return (us); 8412 } 8413 8414 /* 8415 * Free the UDP stack instance. 8416 */ 8417 static void 8418 udp_stack_fini(netstackid_t stackid, void *arg) 8419 { 8420 udp_stack_t *us = (udp_stack_t *)arg; 8421 int i; 8422 8423 for (i = 0; i < us->us_bind_fanout_size; i++) { 8424 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 8425 } 8426 8427 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 8428 sizeof (udp_fanout_t)); 8429 8430 us->us_bind_fanout = NULL; 8431 8432 nd_free(&us->us_nd); 8433 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 8434 us->us_param_arr = NULL; 8435 8436 udp_kstat_fini(stackid, us->us_mibkp); 8437 us->us_mibkp = NULL; 8438 8439 udp_kstat2_fini(stackid, us->us_kstat); 8440 us->us_kstat = NULL; 8441 bzero(&us->us_statistics, sizeof (us->us_statistics)); 8442 kmem_free(us, sizeof (*us)); 8443 } 8444 8445 static void * 8446 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 8447 { 8448 kstat_t *ksp; 8449 8450 udp_stat_t template = { 8451 { "udp_ip_send", KSTAT_DATA_UINT64 }, 8452 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 8453 { "udp_ire_null", KSTAT_DATA_UINT64 }, 8454 { "udp_drain", KSTAT_DATA_UINT64 }, 8455 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 8456 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 8457 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 8458 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 8459 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 8460 { "udp_out_opt", KSTAT_DATA_UINT64 }, 8461 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 8462 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 8463 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 8464 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 8465 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 8466 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 8467 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 8468 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 8469 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 8470 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 8471 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 8472 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 8473 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 8474 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 8475 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 8476 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 8477 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 8478 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 8479 #ifdef DEBUG 8480 { "udp_data_conn", KSTAT_DATA_UINT64 }, 8481 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 8482 #endif 8483 }; 8484 8485 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 8486 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 8487 KSTAT_FLAG_VIRTUAL, stackid); 8488 8489 if (ksp == NULL) 8490 return (NULL); 8491 8492 bcopy(&template, us_statisticsp, sizeof (template)); 8493 ksp->ks_data = (void *)us_statisticsp; 8494 ksp->ks_private = (void *)(uintptr_t)stackid; 8495 8496 kstat_install(ksp); 8497 return (ksp); 8498 } 8499 8500 static void 8501 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 8502 { 8503 if (ksp != NULL) { 8504 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 8505 kstat_delete_netstack(ksp, stackid); 8506 } 8507 } 8508 8509 static void * 8510 udp_kstat_init(netstackid_t stackid) 8511 { 8512 kstat_t *ksp; 8513 8514 udp_named_kstat_t template = { 8515 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 8516 { "inErrors", KSTAT_DATA_UINT32, 0 }, 8517 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 8518 { "entrySize", KSTAT_DATA_INT32, 0 }, 8519 { "entry6Size", KSTAT_DATA_INT32, 0 }, 8520 { "outErrors", KSTAT_DATA_UINT32, 0 }, 8521 }; 8522 8523 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 8524 KSTAT_TYPE_NAMED, 8525 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 8526 8527 if (ksp == NULL || ksp->ks_data == NULL) 8528 return (NULL); 8529 8530 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 8531 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 8532 8533 bcopy(&template, ksp->ks_data, sizeof (template)); 8534 ksp->ks_update = udp_kstat_update; 8535 ksp->ks_private = (void *)(uintptr_t)stackid; 8536 8537 kstat_install(ksp); 8538 return (ksp); 8539 } 8540 8541 static void 8542 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 8543 { 8544 if (ksp != NULL) { 8545 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 8546 kstat_delete_netstack(ksp, stackid); 8547 } 8548 } 8549 8550 static int 8551 udp_kstat_update(kstat_t *kp, int rw) 8552 { 8553 udp_named_kstat_t *udpkp; 8554 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 8555 netstack_t *ns; 8556 udp_stack_t *us; 8557 8558 if ((kp == NULL) || (kp->ks_data == NULL)) 8559 return (EIO); 8560 8561 if (rw == KSTAT_WRITE) 8562 return (EACCES); 8563 8564 ns = netstack_find_by_stackid(stackid); 8565 if (ns == NULL) 8566 return (-1); 8567 us = ns->netstack_udp; 8568 if (us == NULL) { 8569 netstack_rele(ns); 8570 return (-1); 8571 } 8572 udpkp = (udp_named_kstat_t *)kp->ks_data; 8573 8574 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 8575 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 8576 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 8577 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 8578 netstack_rele(ns); 8579 return (0); 8580 } 8581 8582 /* ARGSUSED */ 8583 static void 8584 udp_rput(queue_t *q, mblk_t *mp) 8585 { 8586 /* 8587 * We get here whenever we do qreply() from IP, 8588 * i.e as part of handlings ioctls, etc. 8589 */ 8590 putnext(q, mp); 8591 } 8592 8593 /* 8594 * Read-side synchronous stream info entry point, called as a 8595 * result of handling certain STREAMS ioctl operations. 8596 */ 8597 static int 8598 udp_rinfop(queue_t *q, infod_t *dp) 8599 { 8600 mblk_t *mp; 8601 uint_t cmd = dp->d_cmd; 8602 int res = 0; 8603 int error = 0; 8604 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 8605 struct stdata *stp = STREAM(q); 8606 8607 mutex_enter(&udp->udp_drain_lock); 8608 /* If shutdown on read has happened, return nothing */ 8609 mutex_enter(&stp->sd_lock); 8610 if (stp->sd_flag & STREOF) { 8611 mutex_exit(&stp->sd_lock); 8612 goto done; 8613 } 8614 mutex_exit(&stp->sd_lock); 8615 8616 if ((mp = udp->udp_rcv_list_head) == NULL) 8617 goto done; 8618 8619 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8620 8621 if (cmd & INFOD_COUNT) { 8622 /* 8623 * Return the number of messages. 8624 */ 8625 dp->d_count += udp->udp_rcv_msgcnt; 8626 res |= INFOD_COUNT; 8627 } 8628 if (cmd & INFOD_BYTES) { 8629 /* 8630 * Return size of all data messages. 8631 */ 8632 dp->d_bytes += udp->udp_rcv_cnt; 8633 res |= INFOD_BYTES; 8634 } 8635 if (cmd & INFOD_FIRSTBYTES) { 8636 /* 8637 * Return size of first data message. 8638 */ 8639 dp->d_bytes = msgdsize(mp); 8640 res |= INFOD_FIRSTBYTES; 8641 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8642 } 8643 if (cmd & INFOD_COPYOUT) { 8644 mblk_t *mp1 = mp->b_cont; 8645 int n; 8646 /* 8647 * Return data contents of first message. 8648 */ 8649 ASSERT(DB_TYPE(mp1) == M_DATA); 8650 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8651 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8652 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8653 UIO_READ, dp->d_uiop)) != 0) { 8654 goto done; 8655 } 8656 mp1 = mp1->b_cont; 8657 } 8658 res |= INFOD_COPYOUT; 8659 dp->d_cmd &= ~INFOD_COPYOUT; 8660 } 8661 done: 8662 mutex_exit(&udp->udp_drain_lock); 8663 8664 dp->d_res |= res; 8665 8666 return (error); 8667 } 8668 8669 /* 8670 * Read-side synchronous stream entry point. This is called as a result 8671 * of recv/read operation done at sockfs, and is guaranteed to execute 8672 * outside of the interrupt thread context. It returns a single datagram 8673 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8674 */ 8675 static int 8676 udp_rrw(queue_t *q, struiod_t *dp) 8677 { 8678 mblk_t *mp; 8679 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 8680 udp_stack_t *us = udp->udp_us; 8681 8682 /* We should never get here when we're in SNMP mode */ 8683 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 8684 8685 /* 8686 * Dequeue datagram from the head of the list and return 8687 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8688 * set/cleared depending on whether or not there's data 8689 * remaining in the list. 8690 */ 8691 mutex_enter(&udp->udp_drain_lock); 8692 if (!udp->udp_direct_sockfs) { 8693 mutex_exit(&udp->udp_drain_lock); 8694 UDP_STAT(us, udp_rrw_busy); 8695 return (EBUSY); 8696 } 8697 if ((mp = udp->udp_rcv_list_head) != NULL) { 8698 uint_t size = msgdsize(mp); 8699 8700 /* Last datagram in the list? */ 8701 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8702 udp->udp_rcv_list_tail = NULL; 8703 mp->b_next = NULL; 8704 8705 udp->udp_rcv_cnt -= size; 8706 udp->udp_rcv_msgcnt--; 8707 UDP_STAT(us, udp_rrw_msgcnt); 8708 8709 /* No longer flow-controlling? */ 8710 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8711 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8712 udp->udp_drain_qfull = B_FALSE; 8713 } 8714 if (udp->udp_rcv_list_head == NULL) { 8715 /* 8716 * Either we just dequeued the last datagram or 8717 * we get here from sockfs and have nothing to 8718 * return; in this case clear RSLEEP. 8719 */ 8720 ASSERT(udp->udp_rcv_cnt == 0); 8721 ASSERT(udp->udp_rcv_msgcnt == 0); 8722 ASSERT(udp->udp_rcv_list_tail == NULL); 8723 STR_WAKEUP_CLEAR(STREAM(q)); 8724 } else { 8725 /* 8726 * More data follows; we need udp_rrw() to be 8727 * called in future to pick up the rest. 8728 */ 8729 STR_WAKEUP_SET(STREAM(q)); 8730 } 8731 mutex_exit(&udp->udp_drain_lock); 8732 dp->d_mp = mp; 8733 return (0); 8734 } 8735 8736 /* 8737 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8738 * list; this is typically executed within the interrupt thread context 8739 * and so we do things as quickly as possible. 8740 */ 8741 static void 8742 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8743 { 8744 ASSERT(q == RD(q)); 8745 ASSERT(pkt_len == msgdsize(mp)); 8746 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8747 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8748 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8749 8750 mutex_enter(&udp->udp_drain_lock); 8751 /* 8752 * Wake up and signal the receiving app; it is okay to do this 8753 * before enqueueing the mp because we are holding the drain lock. 8754 * One of the advantages of synchronous stream is the ability for 8755 * us to find out when the application performs a read on the 8756 * socket by way of udp_rrw() entry point being called. We need 8757 * to generate SIGPOLL/SIGIO for each received data in the case 8758 * of asynchronous socket just as in the strrput() case. However, 8759 * we only wake the application up when necessary, i.e. during the 8760 * first enqueue. When udp_rrw() is called, we send up a single 8761 * datagram upstream and call STR_WAKEUP_SET() again when there 8762 * are still data remaining in our receive queue. 8763 */ 8764 if (udp->udp_rcv_list_head == NULL) { 8765 STR_WAKEUP_SET(STREAM(q)); 8766 udp->udp_rcv_list_head = mp; 8767 } else { 8768 udp->udp_rcv_list_tail->b_next = mp; 8769 } 8770 udp->udp_rcv_list_tail = mp; 8771 udp->udp_rcv_cnt += pkt_len; 8772 udp->udp_rcv_msgcnt++; 8773 8774 /* Need to flow-control? */ 8775 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8776 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8777 udp->udp_drain_qfull = B_TRUE; 8778 8779 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8780 STR_SENDSIG(STREAM(q)); 8781 mutex_exit(&udp->udp_drain_lock); 8782 } 8783 8784 /* 8785 * Drain the contents of receive list to the module upstream; we do 8786 * this during close or when we fallback to the slow mode due to 8787 * sockmod being popped or a module being pushed on top of us. 8788 */ 8789 static void 8790 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8791 { 8792 mblk_t *mp; 8793 udp_stack_t *us = udp->udp_us; 8794 8795 ASSERT(q == RD(q)); 8796 8797 mutex_enter(&udp->udp_drain_lock); 8798 /* 8799 * There is no race with a concurrent udp_input() sending 8800 * up packets using putnext() after we have cleared the 8801 * udp_direct_sockfs flag but before we have completed 8802 * sending up the packets in udp_rcv_list, since we are 8803 * either a writer or we have quiesced the conn. 8804 */ 8805 udp->udp_direct_sockfs = B_FALSE; 8806 mutex_exit(&udp->udp_drain_lock); 8807 8808 if (udp->udp_rcv_list_head != NULL) 8809 UDP_STAT(us, udp_drain); 8810 8811 /* 8812 * Send up everything via putnext(); note here that we 8813 * don't need the udp_drain_lock to protect us since 8814 * nothing can enter udp_rrw() and that we currently 8815 * have exclusive access to this udp. 8816 */ 8817 while ((mp = udp->udp_rcv_list_head) != NULL) { 8818 udp->udp_rcv_list_head = mp->b_next; 8819 mp->b_next = NULL; 8820 udp->udp_rcv_cnt -= msgdsize(mp); 8821 udp->udp_rcv_msgcnt--; 8822 if (closing) { 8823 freemsg(mp); 8824 } else { 8825 putnext(q, mp); 8826 } 8827 } 8828 ASSERT(udp->udp_rcv_cnt == 0); 8829 ASSERT(udp->udp_rcv_msgcnt == 0); 8830 ASSERT(udp->udp_rcv_list_head == NULL); 8831 udp->udp_rcv_list_tail = NULL; 8832 udp->udp_drain_qfull = B_FALSE; 8833 } 8834 8835 static size_t 8836 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8837 { 8838 udp_stack_t *us = udp->udp_us; 8839 8840 /* We add a bit of extra buffering */ 8841 size += size >> 1; 8842 if (size > us->us_max_buf) 8843 size = us->us_max_buf; 8844 8845 udp->udp_rcv_hiwat = size; 8846 return (size); 8847 } 8848