1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
25 */
26 /* Copyright (c) 1990 Mentat Inc. */
27
28 #include <sys/sysmacros.h>
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/stropts.h>
32 #include <sys/strlog.h>
33 #include <sys/strsun.h>
34 #define _SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
36 #include <sys/timod.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/strsubr.h>
40 #include <sys/suntpi.h>
41 #include <sys/xti_inet.h>
42 #include <sys/kmem.h>
43 #include <sys/cred_impl.h>
44 #include <sys/policy.h>
45 #include <sys/priv.h>
46 #include <sys/ucred.h>
47 #include <sys/zone.h>
48
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/sockio.h>
52 #include <sys/vtrace.h>
53 #include <sys/sdt.h>
54 #include <sys/debug.h>
55 #include <sys/isa_defs.h>
56 #include <sys/random.h>
57 #include <netinet/in.h>
58 #include <netinet/ip6.h>
59 #include <netinet/icmp6.h>
60 #include <netinet/udp.h>
61
62 #include <inet/common.h>
63 #include <inet/ip.h>
64 #include <inet/ip_impl.h>
65 #include <inet/ipsec_impl.h>
66 #include <inet/ip6.h>
67 #include <inet/ip_ire.h>
68 #include <inet/ip_if.h>
69 #include <inet/ip_multi.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/proto_set.h>
72 #include <inet/mib2.h>
73 #include <inet/optcom.h>
74 #include <inet/snmpcom.h>
75 #include <inet/kstatcom.h>
76 #include <inet/ipclassifier.h>
77 #include <sys/squeue_impl.h>
78 #include <inet/ipnet.h>
79 #include <sys/ethernet.h>
80
81 #include <sys/tsol/label.h>
82 #include <sys/tsol/tnet.h>
83 #include <rpc/pmap_prot.h>
84
85 #include <inet/udp_impl.h>
86
87 /*
88 * Synchronization notes:
89 *
90 * UDP is MT and uses the usual kernel synchronization primitives. There are 2
91 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
92 * protects the contents of the udp_t. uf_lock protects the address and the
93 * fanout information.
94 * The lock order is conn_lock -> uf_lock.
95 *
96 * The fanout lock uf_lock:
97 * When a UDP endpoint is bound to a local port, it is inserted into
98 * a bind hash list. The list consists of an array of udp_fanout_t buckets.
99 * The size of the array is controlled by the udp_bind_fanout_size variable.
100 * This variable can be changed in /etc/system if the default value is
101 * not large enough. Each bind hash bucket is protected by a per bucket
102 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
103 * structure and a few other fields in the udp_t. A UDP endpoint is removed
104 * from the bind hash list only when it is being unbound or being closed.
105 * The per bucket lock also protects a UDP endpoint's state changes.
106 *
107 * Plumbing notes:
108 * UDP is always a device driver. For compatibility with mibopen() code
109 * it is possible to I_PUSH "udp", but that results in pushing a passthrough
110 * dummy module.
111 *
112 * The above implies that we don't support any intermediate module to
113 * reside in between /dev/ip and udp -- in fact, we never supported such
114 * scenario in the past as the inter-layer communication semantics have
115 * always been private.
116 */
117
118 /* For /etc/system control */
119 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
120
121 static void udp_addr_req(queue_t *q, mblk_t *mp);
122 static void udp_tpi_bind(queue_t *q, mblk_t *mp);
123 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
124 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
125 static int udp_build_hdr_template(conn_t *, const in6_addr_t *,
126 const in6_addr_t *, in_port_t, uint32_t);
127 static void udp_capability_req(queue_t *q, mblk_t *mp);
128 static int udp_tpi_close(queue_t *q, int flags);
129 static void udp_close_free(conn_t *);
130 static void udp_tpi_connect(queue_t *q, mblk_t *mp);
131 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp);
132 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
133 int sys_error);
134 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
135 t_scalar_t tlierr, int sys_error);
136 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
137 cred_t *cr);
138 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
139 char *value, caddr_t cp, cred_t *cr);
140 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
141 char *value, caddr_t cp, cred_t *cr);
142 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
143 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
144 ip_recv_attr_t *ira);
145 static void udp_info_req(queue_t *q, mblk_t *mp);
146 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
147 static void udp_lrput(queue_t *, mblk_t *);
148 static void udp_lwput(queue_t *, mblk_t *);
149 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
150 cred_t *credp, boolean_t isv6);
151 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
152 cred_t *credp);
153 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
154 cred_t *credp);
155 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
156 int udp_opt_set(conn_t *connp, uint_t optset_context,
157 int level, int name, uint_t inlen,
158 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
159 void *thisdg_attrs, cred_t *cr);
160 int udp_opt_get(conn_t *connp, int level, int name,
161 uchar_t *ptr);
162 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
163 pid_t pid);
164 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
165 pid_t pid, ip_xmit_attr_t *ixa);
166 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
167 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
168 ip_xmit_attr_t *ixa);
169 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
170 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
171 int *);
172 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
173 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
174 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
175 static void udp_ud_err_connected(conn_t *, t_scalar_t);
176 static void udp_tpi_unbind(queue_t *q, mblk_t *mp);
177 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
178 boolean_t random);
179 static void udp_wput_other(queue_t *q, mblk_t *mp);
180 static void udp_wput_iocdata(queue_t *q, mblk_t *mp);
181 static void udp_wput_fallback(queue_t *q, mblk_t *mp);
182 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size);
183
184 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns);
185 static void udp_stack_fini(netstackid_t stackid, void *arg);
186
187 /* Common routines for TPI and socket module */
188 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
189
190 /* Common routine for TPI and socket module */
191 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *);
192 static void udp_do_close(conn_t *);
193 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
194 boolean_t);
195 static int udp_do_unbind(conn_t *);
196
197 int udp_getsockname(sock_lower_handle_t,
198 struct sockaddr *, socklen_t *, cred_t *);
199 int udp_getpeername(sock_lower_handle_t,
200 struct sockaddr *, socklen_t *, cred_t *);
201 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
202 cred_t *, pid_t);
203
204 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
205
206 /*
207 * Checks if the given destination addr/port is allowed out.
208 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
209 * Called for each connect() and for sendto()/sendmsg() to a different
210 * destination.
211 * For connect(), called in udp_connect().
212 * For sendto()/sendmsg(), called in udp_output_newdst().
213 *
214 * This macro assumes that the cl_inet_connect2 hook is not NULL.
215 * Please check this before calling this macro.
216 *
217 * void
218 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
219 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
220 */
221 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \
222 (err) = 0; \
223 /* \
224 * Running in cluster mode - check and register active \
225 * "connection" information \
226 */ \
227 if ((cp)->conn_ipversion == IPV4_VERSION) \
228 (err) = (*cl_inet_connect2)( \
229 (cp)->conn_netstack->netstack_stackid, \
230 IPPROTO_UDP, is_outgoing, AF_INET, \
231 (uint8_t *)&((cp)->conn_laddr_v4), \
232 (cp)->conn_lport, \
233 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \
234 (in_port_t)(fport), NULL); \
235 else \
236 (err) = (*cl_inet_connect2)( \
237 (cp)->conn_netstack->netstack_stackid, \
238 IPPROTO_UDP, is_outgoing, AF_INET6, \
239 (uint8_t *)&((cp)->conn_laddr_v6), \
240 (cp)->conn_lport, \
241 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \
242 }
243
244 static struct module_info udp_mod_info = {
245 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
246 };
247
248 /*
249 * Entry points for UDP as a device.
250 * We have separate open functions for the /dev/udp and /dev/udp6 devices.
251 */
252 static struct qinit udp_rinitv4 = {
253 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
254 };
255
256 static struct qinit udp_rinitv6 = {
257 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
258 };
259
260 static struct qinit udp_winit = {
261 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info
262 };
263
264 /* UDP entry point during fallback */
265 struct qinit udp_fallback_sock_winit = {
266 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
267 };
268
269 /*
270 * UDP needs to handle I_LINK and I_PLINK since ifconfig
271 * likes to use it as a place to hang the various streams.
272 */
273 static struct qinit udp_lrinit = {
274 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
275 };
276
277 static struct qinit udp_lwinit = {
278 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
279 };
280
281 /* For AF_INET aka /dev/udp */
282 struct streamtab udpinfov4 = {
283 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
284 };
285
286 /* For AF_INET6 aka /dev/udp6 */
287 struct streamtab udpinfov6 = {
288 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
289 };
290
291 #define REUSELIST_MAX 64
292 struct reuselist {
293 conn_t *ru_conns[REUSELIST_MAX];
294 int ru_entries; /* number of entries */
295 int ru_next; /* round-robin pointer */
296 kmutex_t ru_lock;
297 };
298
299 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
300
301 /* Default structure copied into T_INFO_ACK messages */
302 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
303 T_INFO_ACK,
304 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */
305 T_INVALID, /* ETSU_size. udp does not support expedited data. */
306 T_INVALID, /* CDATA_size. udp does not support connect data. */
307 T_INVALID, /* DDATA_size. udp does not support disconnect data. */
308 sizeof (sin_t), /* ADDR_size. */
309 0, /* OPT_size - not initialized here */
310 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */
311 T_CLTS, /* SERV_type. udp supports connection-less. */
312 TS_UNBND, /* CURRENT_state. This is set from udp_state. */
313 (XPG4_1|SENDZERO) /* PROVIDER_flag */
314 };
315
316 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
317
318 static struct T_info_ack udp_g_t_info_ack_ipv6 = {
319 T_INFO_ACK,
320 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */
321 T_INVALID, /* ETSU_size. udp does not support expedited data. */
322 T_INVALID, /* CDATA_size. udp does not support connect data. */
323 T_INVALID, /* DDATA_size. udp does not support disconnect data. */
324 sizeof (sin6_t), /* ADDR_size. */
325 0, /* OPT_size - not initialized here */
326 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */
327 T_CLTS, /* SERV_type. udp supports connection-less. */
328 TS_UNBND, /* CURRENT_state. This is set from udp_state. */
329 (XPG4_1|SENDZERO) /* PROVIDER_flag */
330 };
331
332 /*
333 * UDP tunables related declarations. Definitions are in udp_tunables.c
334 */
335 extern mod_prop_info_t udp_propinfo_tbl[];
336 extern int udp_propinfo_count;
337
338 /* Setable in /etc/system */
339 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
340 uint32_t udp_random_anon_port = 1;
341
342 /*
343 * Hook functions to enable cluster networking.
344 * On non-clustered systems these vectors must always be NULL
345 */
346
347 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
348 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
349 void *args) = NULL;
350 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
351 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
352 void *args) = NULL;
353
354 typedef union T_primitives *t_primp_t;
355
356 static int
udp_reuselist_add(struct reuselist * reusep,conn_t * connp,boolean_t last)357 udp_reuselist_add(struct reuselist *reusep, conn_t *connp, boolean_t last)
358 {
359 /*
360 * we don't need to operate under the mutex here, because it is not
361 * in use yet
362 */
363
364 /*
365 * check if the table is full. If last is zero, it can't be filled
366 * up by design, just assert it. Otherwise check and return an error
367 */
368 ASSERT(last || reusep->ru_entries < REUSELIST_MAX);
369
370 if (last && reusep->ru_entries == REUSELIST_MAX)
371 return -1;
372
373 reusep->ru_conns[reusep->ru_entries++] = connp;
374
375 return 0;
376 }
377
378 static void
udp_reuselist_remove(conn_t * connp)379 udp_reuselist_remove(conn_t *connp)
380 {
381 int i;
382 struct reuselist *reusep = connp->conn_reuselist;
383
384 if (reusep == NULL)
385 return;
386
387 mutex_enter(&reusep->ru_lock);
388
389 for (i = 0; i < reusep->ru_entries; ++i) {
390 if (reusep->ru_conns[i] == connp)
391 break;
392 }
393 ASSERT(i < reusep->ru_entries);
394
395 /* move last entry into freed slot */
396 if (--reusep->ru_entries == 0) {
397 /* last entry, free list */
398 mutex_exit(&reusep->ru_lock);
399 mutex_destroy(&reusep->ru_lock);
400 kmem_free(reusep, sizeof (*reusep));
401 connp->conn_reuselist = NULL;
402 } else {
403 reusep->ru_conns[i] = reusep->ru_conns[reusep->ru_entries];
404
405 /*
406 * reset round-robin pointer, so it doesn't accidentally point
407 * to the last entry
408 */
409 reusep->ru_next = 0;
410 mutex_exit(&reusep->ru_lock);
411 }
412 }
413
414 /*
415 * Return the next anonymous port in the privileged port range for
416 * bind checking.
417 *
418 * Trusted Extension (TX) notes: TX allows administrator to mark or
419 * reserve ports as Multilevel ports (MLP). MLP has special function
420 * on TX systems. Once a port is made MLP, it's not available as
421 * ordinary port. This creates "holes" in the port name space. It
422 * may be necessary to skip the "holes" find a suitable anon port.
423 */
424 static in_port_t
udp_get_next_priv_port(udp_t * udp)425 udp_get_next_priv_port(udp_t *udp)
426 {
427 static in_port_t next_priv_port = IPPORT_RESERVED - 1;
428 in_port_t nextport;
429 boolean_t restart = B_FALSE;
430 udp_stack_t *us = udp->udp_us;
431
432 retry:
433 if (next_priv_port < us->us_min_anonpriv_port ||
434 next_priv_port >= IPPORT_RESERVED) {
435 next_priv_port = IPPORT_RESERVED - 1;
436 if (restart)
437 return (0);
438 restart = B_TRUE;
439 }
440
441 if (is_system_labeled() &&
442 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
443 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
444 next_priv_port = nextport;
445 goto retry;
446 }
447
448 return (next_priv_port--);
449 }
450
451 /*
452 * Hash list removal routine for udp_t structures.
453 */
454 static void
udp_bind_hash_remove(udp_t * udp,boolean_t caller_holds_lock)455 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
456 {
457 udp_t *udpnext;
458 kmutex_t *lockp;
459 udp_stack_t *us = udp->udp_us;
460 conn_t *connp = udp->udp_connp;
461
462 if (udp->udp_ptpbhn == NULL)
463 return;
464
465 /*
466 * Extract the lock pointer in case there are concurrent
467 * hash_remove's for this instance.
468 */
469 ASSERT(connp->conn_lport != 0);
470 if (!caller_holds_lock) {
471 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
472 us->us_bind_fanout_size)].uf_lock;
473 ASSERT(lockp != NULL);
474 mutex_enter(lockp);
475 }
476 if (udp->udp_ptpbhn != NULL) {
477 udpnext = udp->udp_bind_hash;
478 if (udpnext != NULL) {
479 udpnext->udp_ptpbhn = udp->udp_ptpbhn;
480 udp->udp_bind_hash = NULL;
481 }
482 *udp->udp_ptpbhn = udpnext;
483 udp->udp_ptpbhn = NULL;
484 }
485 if (!caller_holds_lock) {
486 mutex_exit(lockp);
487 }
488 }
489
490 static void
udp_bind_hash_insert(udp_fanout_t * uf,udp_t * udp)491 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
492 {
493 conn_t *connp = udp->udp_connp;
494 udp_t **udpp;
495 udp_t *udpnext;
496 conn_t *connext;
497
498 ASSERT(MUTEX_HELD(&uf->uf_lock));
499 ASSERT(udp->udp_ptpbhn == NULL);
500 udpp = &uf->uf_udp;
501 udpnext = udpp[0];
502 if (udpnext != NULL) {
503 /*
504 * If the new udp bound to the INADDR_ANY address
505 * and the first one in the list is not bound to
506 * INADDR_ANY we skip all entries until we find the
507 * first one bound to INADDR_ANY.
508 * This makes sure that applications binding to a
509 * specific address get preference over those binding to
510 * INADDR_ANY.
511 */
512 connext = udpnext->udp_connp;
513 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
514 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
515 while ((udpnext = udpp[0]) != NULL &&
516 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
517 udpp = &(udpnext->udp_bind_hash);
518 }
519 if (udpnext != NULL)
520 udpnext->udp_ptpbhn = &udp->udp_bind_hash;
521 } else {
522 udpnext->udp_ptpbhn = &udp->udp_bind_hash;
523 }
524 }
525 udp->udp_bind_hash = udpnext;
526 udp->udp_ptpbhn = udpp;
527 udpp[0] = udp;
528 }
529
530 /*
531 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
532 * passed to udp_wput.
533 * It associates a port number and local address with the stream.
534 * It calls IP to verify the local IP address, and calls IP to insert
535 * the conn_t in the fanout table.
536 * If everything is ok it then sends the T_BIND_ACK back up.
537 *
538 * Note that UDP over IPv4 and IPv6 sockets can use the same port number
539 * without setting SO_REUSEADDR. This is needed so that they
540 * can be viewed as two independent transport protocols.
541 * However, anonymouns ports are allocated from the same range to avoid
542 * duplicating the us->us_next_port_to_try.
543 */
544 static void
udp_tpi_bind(queue_t * q,mblk_t * mp)545 udp_tpi_bind(queue_t *q, mblk_t *mp)
546 {
547 sin_t *sin;
548 sin6_t *sin6;
549 mblk_t *mp1;
550 struct T_bind_req *tbr;
551 conn_t *connp;
552 udp_t *udp;
553 int error;
554 struct sockaddr *sa;
555 cred_t *cr;
556
557 /*
558 * All Solaris components should pass a db_credp
559 * for this TPI message, hence we ASSERT.
560 * But in case there is some other M_PROTO that looks
561 * like a TPI message sent by some other kernel
562 * component, we check and return an error.
563 */
564 cr = msg_getcred(mp, NULL);
565 ASSERT(cr != NULL);
566 if (cr == NULL) {
567 udp_err_ack(q, mp, TSYSERR, EINVAL);
568 return;
569 }
570
571 connp = Q_TO_CONN(q);
572 udp = connp->conn_udp;
573 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
574 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
575 "udp_bind: bad req, len %u",
576 (uint_t)(mp->b_wptr - mp->b_rptr));
577 udp_err_ack(q, mp, TPROTO, 0);
578 return;
579 }
580 if (udp->udp_state != TS_UNBND) {
581 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
582 "udp_bind: bad state, %u", udp->udp_state);
583 udp_err_ack(q, mp, TOUTSTATE, 0);
584 return;
585 }
586 /*
587 * Reallocate the message to make sure we have enough room for an
588 * address.
589 */
590 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
591 if (mp1 == NULL) {
592 udp_err_ack(q, mp, TSYSERR, ENOMEM);
593 return;
594 }
595
596 mp = mp1;
597
598 /* Reset the message type in preparation for shipping it back. */
599 DB_TYPE(mp) = M_PCPROTO;
600
601 tbr = (struct T_bind_req *)mp->b_rptr;
602 switch (tbr->ADDR_length) {
603 case 0: /* Request for a generic port */
604 tbr->ADDR_offset = sizeof (struct T_bind_req);
605 if (connp->conn_family == AF_INET) {
606 tbr->ADDR_length = sizeof (sin_t);
607 sin = (sin_t *)&tbr[1];
608 *sin = sin_null;
609 sin->sin_family = AF_INET;
610 mp->b_wptr = (uchar_t *)&sin[1];
611 sa = (struct sockaddr *)sin;
612 } else {
613 ASSERT(connp->conn_family == AF_INET6);
614 tbr->ADDR_length = sizeof (sin6_t);
615 sin6 = (sin6_t *)&tbr[1];
616 *sin6 = sin6_null;
617 sin6->sin6_family = AF_INET6;
618 mp->b_wptr = (uchar_t *)&sin6[1];
619 sa = (struct sockaddr *)sin6;
620 }
621 break;
622
623 case sizeof (sin_t): /* Complete IPv4 address */
624 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
625 sizeof (sin_t));
626 if (sa == NULL || !OK_32PTR((char *)sa)) {
627 udp_err_ack(q, mp, TSYSERR, EINVAL);
628 return;
629 }
630 if (connp->conn_family != AF_INET ||
631 sa->sa_family != AF_INET) {
632 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
633 return;
634 }
635 break;
636
637 case sizeof (sin6_t): /* complete IPv6 address */
638 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
639 sizeof (sin6_t));
640 if (sa == NULL || !OK_32PTR((char *)sa)) {
641 udp_err_ack(q, mp, TSYSERR, EINVAL);
642 return;
643 }
644 if (connp->conn_family != AF_INET6 ||
645 sa->sa_family != AF_INET6) {
646 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
647 return;
648 }
649 break;
650
651 default: /* Invalid request */
652 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
653 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
654 udp_err_ack(q, mp, TBADADDR, 0);
655 return;
656 }
657
658 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
659 tbr->PRIM_type != O_T_BIND_REQ);
660
661 if (error != 0) {
662 if (error > 0) {
663 udp_err_ack(q, mp, TSYSERR, error);
664 } else {
665 udp_err_ack(q, mp, -error, 0);
666 }
667 } else {
668 tbr->PRIM_type = T_BIND_ACK;
669 qreply(q, mp);
670 }
671 }
672
673 /*
674 * This routine handles each T_CONN_REQ message passed to udp. It
675 * associates a default destination address with the stream.
676 *
677 * After various error checks are completed, udp_connect() lays
678 * the target address and port into the composite header template.
679 * Then we ask IP for information, including a source address if we didn't
680 * already have one. Finally we send up the T_OK_ACK reply message.
681 */
682 static void
udp_tpi_connect(queue_t * q,mblk_t * mp)683 udp_tpi_connect(queue_t *q, mblk_t *mp)
684 {
685 conn_t *connp = Q_TO_CONN(q);
686 int error;
687 socklen_t len;
688 struct sockaddr *sa;
689 struct T_conn_req *tcr;
690 cred_t *cr;
691 pid_t pid;
692 /*
693 * All Solaris components should pass a db_credp
694 * for this TPI message, hence we ASSERT.
695 * But in case there is some other M_PROTO that looks
696 * like a TPI message sent by some other kernel
697 * component, we check and return an error.
698 */
699 cr = msg_getcred(mp, &pid);
700 ASSERT(cr != NULL);
701 if (cr == NULL) {
702 udp_err_ack(q, mp, TSYSERR, EINVAL);
703 return;
704 }
705
706 tcr = (struct T_conn_req *)mp->b_rptr;
707
708 /* A bit of sanity checking */
709 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
710 udp_err_ack(q, mp, TPROTO, 0);
711 return;
712 }
713
714 if (tcr->OPT_length != 0) {
715 udp_err_ack(q, mp, TBADOPT, 0);
716 return;
717 }
718
719 /*
720 * Determine packet type based on type of address passed in
721 * the request should contain an IPv4 or IPv6 address.
722 * Make sure that address family matches the type of
723 * family of the address passed down.
724 */
725 len = tcr->DEST_length;
726 switch (tcr->DEST_length) {
727 default:
728 udp_err_ack(q, mp, TBADADDR, 0);
729 return;
730
731 case sizeof (sin_t):
732 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
733 sizeof (sin_t));
734 break;
735
736 case sizeof (sin6_t):
737 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
738 sizeof (sin6_t));
739 break;
740 }
741
742 error = proto_verify_ip_addr(connp->conn_family, sa, len);
743 if (error != 0) {
744 udp_err_ack(q, mp, TSYSERR, error);
745 return;
746 }
747
748 error = udp_do_connect(connp, sa, len, cr, pid);
749 if (error != 0) {
750 if (error < 0)
751 udp_err_ack(q, mp, -error, 0);
752 else
753 udp_err_ack(q, mp, TSYSERR, error);
754 } else {
755 mblk_t *mp1;
756 /*
757 * We have to send a connection confirmation to
758 * keep TLI happy.
759 */
760 if (connp->conn_family == AF_INET) {
761 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
762 sizeof (sin_t), NULL, 0);
763 } else {
764 mp1 = mi_tpi_conn_con(NULL, (char *)sa,
765 sizeof (sin6_t), NULL, 0);
766 }
767 if (mp1 == NULL) {
768 udp_err_ack(q, mp, TSYSERR, ENOMEM);
769 return;
770 }
771
772 /*
773 * Send ok_ack for T_CONN_REQ
774 */
775 mp = mi_tpi_ok_ack_alloc(mp);
776 if (mp == NULL) {
777 /* Unable to reuse the T_CONN_REQ for the ack. */
778 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
779 return;
780 }
781
782 putnext(connp->conn_rq, mp);
783 putnext(connp->conn_rq, mp1);
784 }
785 }
786
787 static int
udp_tpi_close(queue_t * q,int flags)788 udp_tpi_close(queue_t *q, int flags)
789 {
790 conn_t *connp;
791
792 if (flags & SO_FALLBACK) {
793 /*
794 * stream is being closed while in fallback
795 * simply free the resources that were allocated
796 */
797 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
798 qprocsoff(q);
799 goto done;
800 }
801
802 connp = Q_TO_CONN(q);
803 udp_do_close(connp);
804 done:
805 q->q_ptr = WR(q)->q_ptr = NULL;
806 return (0);
807 }
808
809 static void
udp_close_free(conn_t * connp)810 udp_close_free(conn_t *connp)
811 {
812 udp_t *udp = connp->conn_udp;
813
814 /* If there are any options associated with the stream, free them. */
815 if (udp->udp_recv_ipp.ipp_fields != 0)
816 ip_pkt_free(&udp->udp_recv_ipp);
817
818 /*
819 * Clear any fields which the kmem_cache constructor clears.
820 * Only udp_connp needs to be preserved.
821 * TBD: We should make this more efficient to avoid clearing
822 * everything.
823 */
824 ASSERT(udp->udp_connp == connp);
825 bzero(udp, sizeof (udp_t));
826 udp->udp_connp = connp;
827 }
828
829 static int
udp_do_disconnect(conn_t * connp)830 udp_do_disconnect(conn_t *connp)
831 {
832 udp_t *udp;
833 udp_fanout_t *udpf;
834 udp_stack_t *us;
835 int error;
836
837 udp = connp->conn_udp;
838 us = udp->udp_us;
839 mutex_enter(&connp->conn_lock);
840 if (udp->udp_state != TS_DATA_XFER) {
841 mutex_exit(&connp->conn_lock);
842 return (-TOUTSTATE);
843 }
844 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
845 us->us_bind_fanout_size)];
846 mutex_enter(&udpf->uf_lock);
847 if (connp->conn_mcbc_bind)
848 connp->conn_saddr_v6 = ipv6_all_zeros;
849 else
850 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
851 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
852 connp->conn_faddr_v6 = ipv6_all_zeros;
853 connp->conn_fport = 0;
854 udp->udp_state = TS_IDLE;
855 mutex_exit(&udpf->uf_lock);
856
857 /* Remove any remnants of mapped address binding */
858 if (connp->conn_family == AF_INET6)
859 connp->conn_ipversion = IPV6_VERSION;
860
861 connp->conn_v6lastdst = ipv6_all_zeros;
862 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
863 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
864 mutex_exit(&connp->conn_lock);
865 if (error != 0)
866 return (error);
867
868 /*
869 * Tell IP to remove the full binding and revert
870 * to the local address binding.
871 */
872 return (ip_laddr_fanout_insert(connp));
873 }
874
875 static void
udp_tpi_disconnect(queue_t * q,mblk_t * mp)876 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
877 {
878 conn_t *connp = Q_TO_CONN(q);
879 int error;
880
881 /*
882 * Allocate the largest primitive we need to send back
883 * T_error_ack is > than T_ok_ack
884 */
885 mp = reallocb(mp, sizeof (struct T_error_ack), 1);
886 if (mp == NULL) {
887 /* Unable to reuse the T_DISCON_REQ for the ack. */
888 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
889 return;
890 }
891
892 error = udp_do_disconnect(connp);
893
894 if (error != 0) {
895 if (error < 0) {
896 udp_err_ack(q, mp, -error, 0);
897 } else {
898 udp_err_ack(q, mp, TSYSERR, error);
899 }
900 } else {
901 mp = mi_tpi_ok_ack_alloc(mp);
902 ASSERT(mp != NULL);
903 qreply(q, mp);
904 }
905 }
906
907 int
udp_disconnect(conn_t * connp)908 udp_disconnect(conn_t *connp)
909 {
910 int error;
911
912 connp->conn_dgram_errind = B_FALSE;
913 error = udp_do_disconnect(connp);
914 if (error < 0)
915 error = proto_tlitosyserr(-error);
916
917 return (error);
918 }
919
920 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
921 static void
udp_err_ack(queue_t * q,mblk_t * mp,t_scalar_t t_error,int sys_error)922 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
923 {
924 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
925 qreply(q, mp);
926 }
927
928 /* Shorthand to generate and send TPI error acks to our client */
929 static void
udp_err_ack_prim(queue_t * q,mblk_t * mp,t_scalar_t primitive,t_scalar_t t_error,int sys_error)930 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
931 t_scalar_t t_error, int sys_error)
932 {
933 struct T_error_ack *teackp;
934
935 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
936 M_PCPROTO, T_ERROR_ACK)) != NULL) {
937 teackp = (struct T_error_ack *)mp->b_rptr;
938 teackp->ERROR_prim = primitive;
939 teackp->TLI_error = t_error;
940 teackp->UNIX_error = sys_error;
941 qreply(q, mp);
942 }
943 }
944
945 /* At minimum we need 4 bytes of UDP header */
946 #define ICMP_MIN_UDP_HDR 4
947
948 /*
949 * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
950 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
951 * Assumes that IP has pulled up everything up to and including the ICMP header.
952 */
953 /* ARGSUSED2 */
954 static void
udp_icmp_input(void * arg1,mblk_t * mp,void * arg2,ip_recv_attr_t * ira)955 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
956 {
957 conn_t *connp = (conn_t *)arg1;
958 icmph_t *icmph;
959 ipha_t *ipha;
960 int iph_hdr_length;
961 udpha_t *udpha;
962 sin_t sin;
963 sin6_t sin6;
964 mblk_t *mp1;
965 int error = 0;
966 udp_t *udp = connp->conn_udp;
967
968 ipha = (ipha_t *)mp->b_rptr;
969
970 ASSERT(OK_32PTR(mp->b_rptr));
971
972 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
973 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
974 udp_icmp_error_ipv6(connp, mp, ira);
975 return;
976 }
977 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
978
979 /* Skip past the outer IP and ICMP headers */
980 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
981 iph_hdr_length = ira->ira_ip_hdr_length;
982 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
983 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */
984
985 /* Skip past the inner IP and find the ULP header */
986 iph_hdr_length = IPH_HDR_LENGTH(ipha);
987 udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
988
989 switch (icmph->icmph_type) {
990 case ICMP_DEST_UNREACHABLE:
991 switch (icmph->icmph_code) {
992 case ICMP_FRAGMENTATION_NEEDED: {
993 ipha_t *ipha;
994 ip_xmit_attr_t *ixa;
995 /*
996 * IP has already adjusted the path MTU.
997 * But we need to adjust DF for IPv4.
998 */
999 if (connp->conn_ipversion != IPV4_VERSION)
1000 break;
1001
1002 ixa = conn_get_ixa(connp, B_FALSE);
1003 if (ixa == NULL || ixa->ixa_ire == NULL) {
1004 /*
1005 * Some other thread holds conn_ixa. We will
1006 * redo this on the next ICMP too big.
1007 */
1008 if (ixa != NULL)
1009 ixa_refrele(ixa);
1010 break;
1011 }
1012 (void) ip_get_pmtu(ixa);
1013
1014 mutex_enter(&connp->conn_lock);
1015 ipha = (ipha_t *)connp->conn_ht_iphc;
1016 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
1017 ipha->ipha_fragment_offset_and_flags |=
1018 IPH_DF_HTONS;
1019 } else {
1020 ipha->ipha_fragment_offset_and_flags &=
1021 ~IPH_DF_HTONS;
1022 }
1023 mutex_exit(&connp->conn_lock);
1024 ixa_refrele(ixa);
1025 break;
1026 }
1027 case ICMP_PORT_UNREACHABLE:
1028 case ICMP_PROTOCOL_UNREACHABLE:
1029 error = ECONNREFUSED;
1030 break;
1031 default:
1032 /* Transient errors */
1033 break;
1034 }
1035 break;
1036 default:
1037 /* Transient errors */
1038 break;
1039 }
1040 if (error == 0) {
1041 freemsg(mp);
1042 return;
1043 }
1044
1045 /*
1046 * Deliver T_UDERROR_IND when the application has asked for it.
1047 * The socket layer enables this automatically when connected.
1048 */
1049 if (!connp->conn_dgram_errind) {
1050 freemsg(mp);
1051 return;
1052 }
1053
1054 switch (connp->conn_family) {
1055 case AF_INET:
1056 sin = sin_null;
1057 sin.sin_family = AF_INET;
1058 sin.sin_addr.s_addr = ipha->ipha_dst;
1059 sin.sin_port = udpha->uha_dst_port;
1060 if (IPCL_IS_NONSTR(connp)) {
1061 mutex_enter(&connp->conn_lock);
1062 if (udp->udp_state == TS_DATA_XFER) {
1063 if (sin.sin_port == connp->conn_fport &&
1064 sin.sin_addr.s_addr ==
1065 connp->conn_faddr_v4) {
1066 mutex_exit(&connp->conn_lock);
1067 (*connp->conn_upcalls->su_set_error)
1068 (connp->conn_upper_handle, error);
1069 goto done;
1070 }
1071 } else {
1072 udp->udp_delayed_error = error;
1073 *((sin_t *)&udp->udp_delayed_addr) = sin;
1074 }
1075 mutex_exit(&connp->conn_lock);
1076 } else {
1077 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1078 NULL, 0, error);
1079 if (mp1 != NULL)
1080 putnext(connp->conn_rq, mp1);
1081 }
1082 break;
1083 case AF_INET6:
1084 sin6 = sin6_null;
1085 sin6.sin6_family = AF_INET6;
1086 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1087 sin6.sin6_port = udpha->uha_dst_port;
1088 if (IPCL_IS_NONSTR(connp)) {
1089 mutex_enter(&connp->conn_lock);
1090 if (udp->udp_state == TS_DATA_XFER) {
1091 if (sin6.sin6_port == connp->conn_fport &&
1092 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1093 &connp->conn_faddr_v6)) {
1094 mutex_exit(&connp->conn_lock);
1095 (*connp->conn_upcalls->su_set_error)
1096 (connp->conn_upper_handle, error);
1097 goto done;
1098 }
1099 } else {
1100 udp->udp_delayed_error = error;
1101 *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1102 }
1103 mutex_exit(&connp->conn_lock);
1104 } else {
1105 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1106 NULL, 0, error);
1107 if (mp1 != NULL)
1108 putnext(connp->conn_rq, mp1);
1109 }
1110 break;
1111 }
1112 done:
1113 freemsg(mp);
1114 }
1115
1116 /*
1117 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1118 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1119 * Assumes that IP has pulled up all the extension headers as well as the
1120 * ICMPv6 header.
1121 */
1122 static void
udp_icmp_error_ipv6(conn_t * connp,mblk_t * mp,ip_recv_attr_t * ira)1123 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1124 {
1125 icmp6_t *icmp6;
1126 ip6_t *ip6h, *outer_ip6h;
1127 uint16_t iph_hdr_length;
1128 uint8_t *nexthdrp;
1129 udpha_t *udpha;
1130 sin6_t sin6;
1131 mblk_t *mp1;
1132 int error = 0;
1133 udp_t *udp = connp->conn_udp;
1134 udp_stack_t *us = udp->udp_us;
1135
1136 outer_ip6h = (ip6_t *)mp->b_rptr;
1137 #ifdef DEBUG
1138 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1139 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1140 else
1141 iph_hdr_length = IPV6_HDR_LEN;
1142 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1143 #endif
1144 /* Skip past the outer IP and ICMP headers */
1145 iph_hdr_length = ira->ira_ip_hdr_length;
1146 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1147
1148 /* Skip past the inner IP and find the ULP header */
1149 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */
1150 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1151 freemsg(mp);
1152 return;
1153 }
1154 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1155
1156 switch (icmp6->icmp6_type) {
1157 case ICMP6_DST_UNREACH:
1158 switch (icmp6->icmp6_code) {
1159 case ICMP6_DST_UNREACH_NOPORT:
1160 error = ECONNREFUSED;
1161 break;
1162 case ICMP6_DST_UNREACH_ADMIN:
1163 case ICMP6_DST_UNREACH_NOROUTE:
1164 case ICMP6_DST_UNREACH_BEYONDSCOPE:
1165 case ICMP6_DST_UNREACH_ADDR:
1166 /* Transient errors */
1167 break;
1168 default:
1169 break;
1170 }
1171 break;
1172 case ICMP6_PACKET_TOO_BIG: {
1173 struct T_unitdata_ind *tudi;
1174 struct T_opthdr *toh;
1175 size_t udi_size;
1176 mblk_t *newmp;
1177 t_scalar_t opt_length = sizeof (struct T_opthdr) +
1178 sizeof (struct ip6_mtuinfo);
1179 sin6_t *sin6;
1180 struct ip6_mtuinfo *mtuinfo;
1181
1182 /*
1183 * If the application has requested to receive path mtu
1184 * information, send up an empty message containing an
1185 * IPV6_PATHMTU ancillary data item.
1186 */
1187 if (!connp->conn_ipv6_recvpathmtu)
1188 break;
1189
1190 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1191 opt_length;
1192 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1193 UDPS_BUMP_MIB(us, udpInErrors);
1194 break;
1195 }
1196
1197 /*
1198 * newmp->b_cont is left to NULL on purpose. This is an
1199 * empty message containing only ancillary data.
1200 */
1201 newmp->b_datap->db_type = M_PROTO;
1202 tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1203 newmp->b_wptr = (uchar_t *)tudi + udi_size;
1204 tudi->PRIM_type = T_UNITDATA_IND;
1205 tudi->SRC_length = sizeof (sin6_t);
1206 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1207 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1208 tudi->OPT_length = opt_length;
1209
1210 sin6 = (sin6_t *)&tudi[1];
1211 bzero(sin6, sizeof (sin6_t));
1212 sin6->sin6_family = AF_INET6;
1213 sin6->sin6_addr = connp->conn_faddr_v6;
1214
1215 toh = (struct T_opthdr *)&sin6[1];
1216 toh->level = IPPROTO_IPV6;
1217 toh->name = IPV6_PATHMTU;
1218 toh->len = opt_length;
1219 toh->status = 0;
1220
1221 mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1222 bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1223 mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1224 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1225 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1226 /*
1227 * We've consumed everything we need from the original
1228 * message. Free it, then send our empty message.
1229 */
1230 freemsg(mp);
1231 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1232 return;
1233 }
1234 case ICMP6_TIME_EXCEEDED:
1235 /* Transient errors */
1236 break;
1237 case ICMP6_PARAM_PROB:
1238 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1239 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1240 (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1241 (uchar_t *)nexthdrp) {
1242 error = ECONNREFUSED;
1243 break;
1244 }
1245 break;
1246 }
1247 if (error == 0) {
1248 freemsg(mp);
1249 return;
1250 }
1251
1252 /*
1253 * Deliver T_UDERROR_IND when the application has asked for it.
1254 * The socket layer enables this automatically when connected.
1255 */
1256 if (!connp->conn_dgram_errind) {
1257 freemsg(mp);
1258 return;
1259 }
1260
1261 sin6 = sin6_null;
1262 sin6.sin6_family = AF_INET6;
1263 sin6.sin6_addr = ip6h->ip6_dst;
1264 sin6.sin6_port = udpha->uha_dst_port;
1265 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1266
1267 if (IPCL_IS_NONSTR(connp)) {
1268 mutex_enter(&connp->conn_lock);
1269 if (udp->udp_state == TS_DATA_XFER) {
1270 if (sin6.sin6_port == connp->conn_fport &&
1271 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1272 &connp->conn_faddr_v6)) {
1273 mutex_exit(&connp->conn_lock);
1274 (*connp->conn_upcalls->su_set_error)
1275 (connp->conn_upper_handle, error);
1276 goto done;
1277 }
1278 } else {
1279 udp->udp_delayed_error = error;
1280 *((sin6_t *)&udp->udp_delayed_addr) = sin6;
1281 }
1282 mutex_exit(&connp->conn_lock);
1283 } else {
1284 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1285 NULL, 0, error);
1286 if (mp1 != NULL)
1287 putnext(connp->conn_rq, mp1);
1288 }
1289 done:
1290 freemsg(mp);
1291 }
1292
1293 /*
1294 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput.
1295 * The local address is filled in if endpoint is bound. The remote address
1296 * is filled in if remote address has been precified ("connected endpoint")
1297 * (The concept of connected CLTS sockets is alien to published TPI
1298 * but we support it anyway).
1299 */
1300 static void
udp_addr_req(queue_t * q,mblk_t * mp)1301 udp_addr_req(queue_t *q, mblk_t *mp)
1302 {
1303 struct sockaddr *sa;
1304 mblk_t *ackmp;
1305 struct T_addr_ack *taa;
1306 udp_t *udp = Q_TO_UDP(q);
1307 conn_t *connp = udp->udp_connp;
1308 uint_t addrlen;
1309
1310 /* Make it large enough for worst case */
1311 ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1312 2 * sizeof (sin6_t), 1);
1313 if (ackmp == NULL) {
1314 udp_err_ack(q, mp, TSYSERR, ENOMEM);
1315 return;
1316 }
1317 taa = (struct T_addr_ack *)ackmp->b_rptr;
1318
1319 bzero(taa, sizeof (struct T_addr_ack));
1320 ackmp->b_wptr = (uchar_t *)&taa[1];
1321
1322 taa->PRIM_type = T_ADDR_ACK;
1323 ackmp->b_datap->db_type = M_PCPROTO;
1324
1325 if (connp->conn_family == AF_INET)
1326 addrlen = sizeof (sin_t);
1327 else
1328 addrlen = sizeof (sin6_t);
1329
1330 mutex_enter(&connp->conn_lock);
1331 /*
1332 * Note: Following code assumes 32 bit alignment of basic
1333 * data structures like sin_t and struct T_addr_ack.
1334 */
1335 if (udp->udp_state != TS_UNBND) {
1336 /*
1337 * Fill in local address first
1338 */
1339 taa->LOCADDR_offset = sizeof (*taa);
1340 taa->LOCADDR_length = addrlen;
1341 sa = (struct sockaddr *)&taa[1];
1342 (void) conn_getsockname(connp, sa, &addrlen);
1343 ackmp->b_wptr += addrlen;
1344 }
1345 if (udp->udp_state == TS_DATA_XFER) {
1346 /*
1347 * connected, fill remote address too
1348 */
1349 taa->REMADDR_length = addrlen;
1350 /* assumed 32-bit alignment */
1351 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1352 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1353 (void) conn_getpeername(connp, sa, &addrlen);
1354 ackmp->b_wptr += addrlen;
1355 }
1356 mutex_exit(&connp->conn_lock);
1357 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1358 qreply(q, ackmp);
1359 }
1360
1361 static void
udp_copy_info(struct T_info_ack * tap,udp_t * udp)1362 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1363 {
1364 conn_t *connp = udp->udp_connp;
1365
1366 if (connp->conn_family == AF_INET) {
1367 *tap = udp_g_t_info_ack_ipv4;
1368 } else {
1369 *tap = udp_g_t_info_ack_ipv6;
1370 }
1371 tap->CURRENT_state = udp->udp_state;
1372 tap->OPT_size = udp_max_optsize;
1373 }
1374
1375 static void
udp_do_capability_ack(udp_t * udp,struct T_capability_ack * tcap,t_uscalar_t cap_bits1)1376 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1377 t_uscalar_t cap_bits1)
1378 {
1379 tcap->CAP_bits1 = 0;
1380
1381 if (cap_bits1 & TC1_INFO) {
1382 udp_copy_info(&tcap->INFO_ack, udp);
1383 tcap->CAP_bits1 |= TC1_INFO;
1384 }
1385 }
1386
1387 /*
1388 * This routine responds to T_CAPABILITY_REQ messages. It is called by
1389 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from
1390 * udp_g_t_info_ack. The current state of the stream is copied from
1391 * udp_state.
1392 */
1393 static void
udp_capability_req(queue_t * q,mblk_t * mp)1394 udp_capability_req(queue_t *q, mblk_t *mp)
1395 {
1396 t_uscalar_t cap_bits1;
1397 struct T_capability_ack *tcap;
1398 udp_t *udp = Q_TO_UDP(q);
1399
1400 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1401
1402 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1403 mp->b_datap->db_type, T_CAPABILITY_ACK);
1404 if (!mp)
1405 return;
1406
1407 tcap = (struct T_capability_ack *)mp->b_rptr;
1408 udp_do_capability_ack(udp, tcap, cap_bits1);
1409
1410 qreply(q, mp);
1411 }
1412
1413 /*
1414 * This routine responds to T_INFO_REQ messages. It is called by udp_wput.
1415 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1416 * The current state of the stream is copied from udp_state.
1417 */
1418 static void
udp_info_req(queue_t * q,mblk_t * mp)1419 udp_info_req(queue_t *q, mblk_t *mp)
1420 {
1421 udp_t *udp = Q_TO_UDP(q);
1422
1423 /* Create a T_INFO_ACK message. */
1424 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1425 T_INFO_ACK);
1426 if (!mp)
1427 return;
1428 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1429 qreply(q, mp);
1430 }
1431
1432 /* For /dev/udp aka AF_INET open */
1433 static int
udp_openv4(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp)1434 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1435 {
1436 return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1437 }
1438
1439 /* For /dev/udp6 aka AF_INET6 open */
1440 static int
udp_openv6(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp)1441 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1442 {
1443 return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1444 }
1445
1446 /*
1447 * This is the open routine for udp. It allocates a udp_t structure for
1448 * the stream and, on the first open of the module, creates an ND table.
1449 */
1450 static int
udp_open(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp,boolean_t isv6)1451 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1452 boolean_t isv6)
1453 {
1454 udp_t *udp;
1455 conn_t *connp;
1456 dev_t conn_dev;
1457 vmem_t *minor_arena;
1458 int err;
1459
1460 /* If the stream is already open, return immediately. */
1461 if (q->q_ptr != NULL)
1462 return (0);
1463
1464 if (sflag == MODOPEN)
1465 return (EINVAL);
1466
1467 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1468 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1469 minor_arena = ip_minor_arena_la;
1470 } else {
1471 /*
1472 * Either minor numbers in the large arena were exhausted
1473 * or a non socket application is doing the open.
1474 * Try to allocate from the small arena.
1475 */
1476 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1477 return (EBUSY);
1478
1479 minor_arena = ip_minor_arena_sa;
1480 }
1481
1482 if (flag & SO_FALLBACK) {
1483 /*
1484 * Non streams socket needs a stream to fallback to
1485 */
1486 RD(q)->q_ptr = (void *)conn_dev;
1487 WR(q)->q_qinfo = &udp_fallback_sock_winit;
1488 WR(q)->q_ptr = (void *)minor_arena;
1489 qprocson(q);
1490 return (0);
1491 }
1492
1493 connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1494 if (connp == NULL) {
1495 inet_minor_free(minor_arena, conn_dev);
1496 return (err);
1497 }
1498 udp = connp->conn_udp;
1499
1500 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1501 connp->conn_dev = conn_dev;
1502 connp->conn_minor_arena = minor_arena;
1503
1504 /*
1505 * Initialize the udp_t structure for this stream.
1506 */
1507 q->q_ptr = connp;
1508 WR(q)->q_ptr = connp;
1509 connp->conn_rq = q;
1510 connp->conn_wq = WR(q);
1511
1512 /*
1513 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1514 * need to lock anything.
1515 */
1516 ASSERT(connp->conn_proto == IPPROTO_UDP);
1517 ASSERT(connp->conn_udp == udp);
1518 ASSERT(udp->udp_connp == connp);
1519
1520 if (flag & SO_SOCKSTR) {
1521 udp->udp_issocket = B_TRUE;
1522 }
1523
1524 WR(q)->q_hiwat = connp->conn_sndbuf;
1525 WR(q)->q_lowat = connp->conn_sndlowat;
1526
1527 qprocson(q);
1528
1529 /* Set the Stream head write offset and high watermark. */
1530 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1531 (void) proto_set_rx_hiwat(q, connp,
1532 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1533
1534 mutex_enter(&connp->conn_lock);
1535 connp->conn_state_flags &= ~CONN_INCIPIENT;
1536 mutex_exit(&connp->conn_lock);
1537 return (0);
1538 }
1539
1540 /*
1541 * Which UDP options OK to set through T_UNITDATA_REQ...
1542 */
1543 /* ARGSUSED */
1544 static boolean_t
udp_opt_allow_udr_set(t_scalar_t level,t_scalar_t name)1545 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1546 {
1547 return (B_TRUE);
1548 }
1549
1550 /*
1551 * This routine gets default values of certain options whose default
1552 * values are maintained by protcol specific code
1553 */
1554 int
udp_opt_default(queue_t * q,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1555 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1556 {
1557 udp_t *udp = Q_TO_UDP(q);
1558 udp_stack_t *us = udp->udp_us;
1559 int *i1 = (int *)ptr;
1560
1561 switch (level) {
1562 case IPPROTO_IP:
1563 switch (name) {
1564 case IP_MULTICAST_TTL:
1565 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1566 return (sizeof (uchar_t));
1567 case IP_MULTICAST_LOOP:
1568 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1569 return (sizeof (uchar_t));
1570 }
1571 break;
1572 case IPPROTO_IPV6:
1573 switch (name) {
1574 case IPV6_MULTICAST_HOPS:
1575 *i1 = IP_DEFAULT_MULTICAST_TTL;
1576 return (sizeof (int));
1577 case IPV6_MULTICAST_LOOP:
1578 *i1 = IP_DEFAULT_MULTICAST_LOOP;
1579 return (sizeof (int));
1580 case IPV6_UNICAST_HOPS:
1581 *i1 = us->us_ipv6_hoplimit;
1582 return (sizeof (int));
1583 }
1584 break;
1585 }
1586 return (-1);
1587 }
1588
1589 /*
1590 * This routine retrieves the current status of socket options.
1591 * It returns the size of the option retrieved, or -1.
1592 */
1593 int
udp_opt_get(conn_t * connp,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1594 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1595 uchar_t *ptr)
1596 {
1597 int *i1 = (int *)ptr;
1598 udp_t *udp = connp->conn_udp;
1599 int len;
1600 conn_opt_arg_t coas;
1601 int retval;
1602
1603 coas.coa_connp = connp;
1604 coas.coa_ixa = connp->conn_ixa;
1605 coas.coa_ipp = &connp->conn_xmit_ipp;
1606 coas.coa_ancillary = B_FALSE;
1607 coas.coa_changed = 0;
1608
1609 /*
1610 * We assume that the optcom framework has checked for the set
1611 * of levels and names that are supported, hence we don't worry
1612 * about rejecting based on that.
1613 * First check for UDP specific handling, then pass to common routine.
1614 */
1615 switch (level) {
1616 case IPPROTO_IP:
1617 /*
1618 * Only allow IPv4 option processing on IPv4 sockets.
1619 */
1620 if (connp->conn_family != AF_INET)
1621 return (-1);
1622
1623 switch (name) {
1624 case IP_OPTIONS:
1625 case T_IP_OPTIONS:
1626 mutex_enter(&connp->conn_lock);
1627 if (!(udp->udp_recv_ipp.ipp_fields &
1628 IPPF_IPV4_OPTIONS)) {
1629 mutex_exit(&connp->conn_lock);
1630 return (0);
1631 }
1632
1633 len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1634 ASSERT(len != 0);
1635 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1636 mutex_exit(&connp->conn_lock);
1637 return (len);
1638 }
1639 break;
1640 case IPPROTO_UDP:
1641 switch (name) {
1642 case UDP_NAT_T_ENDPOINT:
1643 mutex_enter(&connp->conn_lock);
1644 *i1 = udp->udp_nat_t_endpoint;
1645 mutex_exit(&connp->conn_lock);
1646 return (sizeof (int));
1647 case UDP_RCVHDR:
1648 mutex_enter(&connp->conn_lock);
1649 *i1 = udp->udp_rcvhdr ? 1 : 0;
1650 mutex_exit(&connp->conn_lock);
1651 return (sizeof (int));
1652 }
1653 }
1654 mutex_enter(&connp->conn_lock);
1655 retval = conn_opt_get(&coas, level, name, ptr);
1656 mutex_exit(&connp->conn_lock);
1657 return (retval);
1658 }
1659
1660 /*
1661 * This routine retrieves the current status of socket options.
1662 * It returns the size of the option retrieved, or -1.
1663 */
1664 int
udp_tpi_opt_get(queue_t * q,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1665 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1666 {
1667 conn_t *connp = Q_TO_CONN(q);
1668 int err;
1669
1670 err = udp_opt_get(connp, level, name, ptr);
1671 return (err);
1672 }
1673
1674 /*
1675 * This routine sets socket options.
1676 */
1677 int
udp_do_opt_set(conn_opt_arg_t * coa,int level,int name,uint_t inlen,uchar_t * invalp,cred_t * cr,boolean_t checkonly)1678 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1679 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1680 {
1681 conn_t *connp = coa->coa_connp;
1682 ip_xmit_attr_t *ixa = coa->coa_ixa;
1683 udp_t *udp = connp->conn_udp;
1684 udp_stack_t *us = udp->udp_us;
1685 int *i1 = (int *)invalp;
1686 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1687 int error;
1688
1689 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1690 /*
1691 * First do UDP specific sanity checks and handle UDP specific
1692 * options. Note that some IPPROTO_UDP options are handled
1693 * by conn_opt_set.
1694 */
1695 switch (level) {
1696 case SOL_SOCKET:
1697 switch (name) {
1698 case SO_SNDBUF:
1699 if (*i1 > us->us_max_buf) {
1700 return (ENOBUFS);
1701 }
1702 break;
1703 case SO_RCVBUF:
1704 if (*i1 > us->us_max_buf) {
1705 return (ENOBUFS);
1706 }
1707 break;
1708
1709 case SCM_UCRED: {
1710 struct ucred_s *ucr;
1711 cred_t *newcr;
1712 ts_label_t *tsl;
1713
1714 /*
1715 * Only sockets that have proper privileges and are
1716 * bound to MLPs will have any other value here, so
1717 * this implicitly tests for privilege to set label.
1718 */
1719 if (connp->conn_mlp_type == mlptSingle)
1720 break;
1721
1722 ucr = (struct ucred_s *)invalp;
1723 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1724 ucr->uc_labeloff < sizeof (*ucr) ||
1725 ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1726 return (EINVAL);
1727 if (!checkonly) {
1728 /*
1729 * Set ixa_tsl to the new label.
1730 * We assume that crgetzoneid doesn't change
1731 * as part of the SCM_UCRED.
1732 */
1733 ASSERT(cr != NULL);
1734 if ((tsl = crgetlabel(cr)) == NULL)
1735 return (EINVAL);
1736 newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1737 tsl->tsl_doi, KM_NOSLEEP);
1738 if (newcr == NULL)
1739 return (ENOSR);
1740 ASSERT(newcr->cr_label != NULL);
1741 /*
1742 * Move the hold on the cr_label to ixa_tsl by
1743 * setting cr_label to NULL. Then release newcr.
1744 */
1745 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1746 ixa->ixa_flags |= IXAF_UCRED_TSL;
1747 newcr->cr_label = NULL;
1748 crfree(newcr);
1749 coa->coa_changed |= COA_HEADER_CHANGED;
1750 coa->coa_changed |= COA_WROFF_CHANGED;
1751 }
1752 /* Fully handled this option. */
1753 return (0);
1754 }
1755 }
1756 break;
1757 case IPPROTO_UDP:
1758 switch (name) {
1759 case UDP_NAT_T_ENDPOINT:
1760 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1761 return (error);
1762 }
1763
1764 /*
1765 * Use conn_family instead so we can avoid ambiguitites
1766 * with AF_INET6 sockets that may switch from IPv4
1767 * to IPv6.
1768 */
1769 if (connp->conn_family != AF_INET) {
1770 return (EAFNOSUPPORT);
1771 }
1772
1773 if (!checkonly) {
1774 mutex_enter(&connp->conn_lock);
1775 udp->udp_nat_t_endpoint = onoff;
1776 mutex_exit(&connp->conn_lock);
1777 coa->coa_changed |= COA_HEADER_CHANGED;
1778 coa->coa_changed |= COA_WROFF_CHANGED;
1779 }
1780 /* Fully handled this option. */
1781 return (0);
1782 case UDP_RCVHDR:
1783 mutex_enter(&connp->conn_lock);
1784 udp->udp_rcvhdr = onoff;
1785 mutex_exit(&connp->conn_lock);
1786 return (0);
1787 }
1788 break;
1789 }
1790 error = conn_opt_set(coa, level, name, inlen, invalp,
1791 checkonly, cr);
1792 return (error);
1793 }
1794
1795 /*
1796 * This routine sets socket options.
1797 */
1798 int
udp_opt_set(conn_t * connp,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)1799 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1800 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1801 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1802 {
1803 udp_t *udp = connp->conn_udp;
1804 int err;
1805 conn_opt_arg_t coas, *coa;
1806 boolean_t checkonly;
1807 udp_stack_t *us = udp->udp_us;
1808
1809 switch (optset_context) {
1810 case SETFN_OPTCOM_CHECKONLY:
1811 checkonly = B_TRUE;
1812 /*
1813 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1814 * inlen != 0 implies value supplied and
1815 * we have to "pretend" to set it.
1816 * inlen == 0 implies that there is no
1817 * value part in T_CHECK request and just validation
1818 * done elsewhere should be enough, we just return here.
1819 */
1820 if (inlen == 0) {
1821 *outlenp = 0;
1822 return (0);
1823 }
1824 break;
1825 case SETFN_OPTCOM_NEGOTIATE:
1826 checkonly = B_FALSE;
1827 break;
1828 case SETFN_UD_NEGOTIATE:
1829 case SETFN_CONN_NEGOTIATE:
1830 checkonly = B_FALSE;
1831 /*
1832 * Negotiating local and "association-related" options
1833 * through T_UNITDATA_REQ.
1834 *
1835 * Following routine can filter out ones we do not
1836 * want to be "set" this way.
1837 */
1838 if (!udp_opt_allow_udr_set(level, name)) {
1839 *outlenp = 0;
1840 return (EINVAL);
1841 }
1842 break;
1843 default:
1844 /*
1845 * We should never get here
1846 */
1847 *outlenp = 0;
1848 return (EINVAL);
1849 }
1850
1851 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1852 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1853
1854 if (thisdg_attrs != NULL) {
1855 /* Options from T_UNITDATA_REQ */
1856 coa = (conn_opt_arg_t *)thisdg_attrs;
1857 ASSERT(coa->coa_connp == connp);
1858 ASSERT(coa->coa_ixa != NULL);
1859 ASSERT(coa->coa_ipp != NULL);
1860 ASSERT(coa->coa_ancillary);
1861 } else {
1862 coa = &coas;
1863 coas.coa_connp = connp;
1864 /* Get a reference on conn_ixa to prevent concurrent mods */
1865 coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1866 if (coas.coa_ixa == NULL) {
1867 *outlenp = 0;
1868 return (ENOMEM);
1869 }
1870 coas.coa_ipp = &connp->conn_xmit_ipp;
1871 coas.coa_ancillary = B_FALSE;
1872 coas.coa_changed = 0;
1873 }
1874
1875 err = udp_do_opt_set(coa, level, name, inlen, invalp,
1876 cr, checkonly);
1877 if (err != 0) {
1878 errout:
1879 if (!coa->coa_ancillary)
1880 ixa_refrele(coa->coa_ixa);
1881 *outlenp = 0;
1882 return (err);
1883 }
1884 /* Handle DHCPINIT here outside of lock */
1885 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1886 uint_t ifindex;
1887 ill_t *ill;
1888
1889 ifindex = *(uint_t *)invalp;
1890 if (ifindex == 0) {
1891 ill = NULL;
1892 } else {
1893 ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1894 coa->coa_ixa->ixa_ipst);
1895 if (ill == NULL) {
1896 err = ENXIO;
1897 goto errout;
1898 }
1899
1900 mutex_enter(&ill->ill_lock);
1901 if (ill->ill_state_flags & ILL_CONDEMNED) {
1902 mutex_exit(&ill->ill_lock);
1903 ill_refrele(ill);
1904 err = ENXIO;
1905 goto errout;
1906 }
1907 if (IS_VNI(ill)) {
1908 mutex_exit(&ill->ill_lock);
1909 ill_refrele(ill);
1910 err = EINVAL;
1911 goto errout;
1912 }
1913 }
1914 mutex_enter(&connp->conn_lock);
1915
1916 if (connp->conn_dhcpinit_ill != NULL) {
1917 /*
1918 * We've locked the conn so conn_cleanup_ill()
1919 * cannot clear conn_dhcpinit_ill -- so it's
1920 * safe to access the ill.
1921 */
1922 ill_t *oill = connp->conn_dhcpinit_ill;
1923
1924 ASSERT(oill->ill_dhcpinit != 0);
1925 atomic_dec_32(&oill->ill_dhcpinit);
1926 ill_set_inputfn(connp->conn_dhcpinit_ill);
1927 connp->conn_dhcpinit_ill = NULL;
1928 }
1929
1930 if (ill != NULL) {
1931 connp->conn_dhcpinit_ill = ill;
1932 atomic_inc_32(&ill->ill_dhcpinit);
1933 ill_set_inputfn(ill);
1934 mutex_exit(&connp->conn_lock);
1935 mutex_exit(&ill->ill_lock);
1936 ill_refrele(ill);
1937 } else {
1938 mutex_exit(&connp->conn_lock);
1939 }
1940 }
1941
1942 /*
1943 * Common case of OK return with outval same as inval.
1944 */
1945 if (invalp != outvalp) {
1946 /* don't trust bcopy for identical src/dst */
1947 (void) bcopy(invalp, outvalp, inlen);
1948 }
1949 *outlenp = inlen;
1950
1951 /*
1952 * If this was not ancillary data, then we rebuild the headers,
1953 * update the IRE/NCE, and IPsec as needed.
1954 * Since the label depends on the destination we go through
1955 * ip_set_destination first.
1956 */
1957 if (coa->coa_ancillary) {
1958 return (0);
1959 }
1960
1961 if (coa->coa_changed & COA_ROUTE_CHANGED) {
1962 in6_addr_t saddr, faddr, nexthop;
1963 in_port_t fport;
1964
1965 /*
1966 * We clear lastdst to make sure we pick up the change
1967 * next time sending.
1968 * If we are connected we re-cache the information.
1969 * We ignore errors to preserve BSD behavior.
1970 * Note that we don't redo IPsec policy lookup here
1971 * since the final destination (or source) didn't change.
1972 */
1973 mutex_enter(&connp->conn_lock);
1974 connp->conn_v6lastdst = ipv6_all_zeros;
1975
1976 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
1977 &connp->conn_faddr_v6, &nexthop);
1978 saddr = connp->conn_saddr_v6;
1979 faddr = connp->conn_faddr_v6;
1980 fport = connp->conn_fport;
1981 mutex_exit(&connp->conn_lock);
1982
1983 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
1984 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
1985 (void) ip_attr_connect(connp, coa->coa_ixa,
1986 &saddr, &faddr, &nexthop, fport, NULL, NULL,
1987 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
1988 }
1989 }
1990
1991 ixa_refrele(coa->coa_ixa);
1992
1993 if (coa->coa_changed & COA_HEADER_CHANGED) {
1994 /*
1995 * Rebuild the header template if we are connected.
1996 * Otherwise clear conn_v6lastdst so we rebuild the header
1997 * in the data path.
1998 */
1999 mutex_enter(&connp->conn_lock);
2000 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
2001 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
2002 err = udp_build_hdr_template(connp,
2003 &connp->conn_saddr_v6, &connp->conn_faddr_v6,
2004 connp->conn_fport, connp->conn_flowinfo);
2005 if (err != 0) {
2006 mutex_exit(&connp->conn_lock);
2007 return (err);
2008 }
2009 } else {
2010 connp->conn_v6lastdst = ipv6_all_zeros;
2011 }
2012 mutex_exit(&connp->conn_lock);
2013 }
2014 if (coa->coa_changed & COA_RCVBUF_CHANGED) {
2015 (void) proto_set_rx_hiwat(connp->conn_rq, connp,
2016 connp->conn_rcvbuf);
2017 }
2018 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
2019 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
2020 }
2021 if (coa->coa_changed & COA_WROFF_CHANGED) {
2022 /* Increase wroff if needed */
2023 uint_t wroff;
2024
2025 mutex_enter(&connp->conn_lock);
2026 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
2027 if (udp->udp_nat_t_endpoint)
2028 wroff += sizeof (uint32_t);
2029 if (wroff > connp->conn_wroff) {
2030 connp->conn_wroff = wroff;
2031 mutex_exit(&connp->conn_lock);
2032 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
2033 } else {
2034 mutex_exit(&connp->conn_lock);
2035 }
2036 }
2037 return (err);
2038 }
2039
2040 /* This routine sets socket options. */
2041 int
udp_tpi_opt_set(queue_t * q,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)2042 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2043 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2044 void *thisdg_attrs, cred_t *cr)
2045 {
2046 conn_t *connp = Q_TO_CONN(q);
2047 int error;
2048
2049 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
2050 outlenp, outvalp, thisdg_attrs, cr);
2051 return (error);
2052 }
2053
2054 /*
2055 * Setup IP and UDP headers.
2056 * Returns NULL on allocation failure, in which case data_mp is freed.
2057 */
2058 mblk_t *
udp_prepend_hdr(conn_t * connp,ip_xmit_attr_t * ixa,const ip_pkt_t * ipp,const in6_addr_t * v6src,const in6_addr_t * v6dst,in_port_t dstport,uint32_t flowinfo,mblk_t * data_mp,int * errorp)2059 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2060 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
2061 uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2062 {
2063 mblk_t *mp;
2064 udpha_t *udpha;
2065 udp_stack_t *us = connp->conn_netstack->netstack_udp;
2066 uint_t data_len;
2067 uint32_t cksum;
2068 udp_t *udp = connp->conn_udp;
2069 boolean_t insert_spi = udp->udp_nat_t_endpoint;
2070 uint_t ulp_hdr_len;
2071
2072 data_len = msgdsize(data_mp);
2073 ulp_hdr_len = UDPH_SIZE;
2074 if (insert_spi)
2075 ulp_hdr_len += sizeof (uint32_t);
2076
2077 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2078 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2079 if (mp == NULL) {
2080 ASSERT(*errorp != 0);
2081 return (NULL);
2082 }
2083
2084 data_len += ulp_hdr_len;
2085 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2086
2087 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2088 udpha->uha_src_port = connp->conn_lport;
2089 udpha->uha_dst_port = dstport;
2090 udpha->uha_checksum = 0;
2091 udpha->uha_length = htons(data_len);
2092
2093 /*
2094 * If there was a routing option/header then conn_prepend_hdr
2095 * has massaged it and placed the pseudo-header checksum difference
2096 * in the cksum argument.
2097 *
2098 * Setup header length and prepare for ULP checksum done in IP.
2099 *
2100 * We make it easy for IP to include our pseudo header
2101 * by putting our length in uha_checksum.
2102 * The IP source, destination, and length have already been set by
2103 * conn_prepend_hdr.
2104 */
2105 cksum += data_len;
2106 cksum = (cksum >> 16) + (cksum & 0xFFFF);
2107 ASSERT(cksum < 0x10000);
2108
2109 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2110 ipha_t *ipha = (ipha_t *)mp->b_rptr;
2111
2112 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2113
2114 /* IP does the checksum if uha_checksum is non-zero */
2115 if (us->us_do_checksum) {
2116 if (cksum == 0)
2117 udpha->uha_checksum = 0xffff;
2118 else
2119 udpha->uha_checksum = htons(cksum);
2120 } else {
2121 udpha->uha_checksum = 0;
2122 }
2123 } else {
2124 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2125
2126 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2127 if (cksum == 0)
2128 udpha->uha_checksum = 0xffff;
2129 else
2130 udpha->uha_checksum = htons(cksum);
2131 }
2132
2133 /* Insert all-0s SPI now. */
2134 if (insert_spi)
2135 *((uint32_t *)(udpha + 1)) = 0;
2136
2137 return (mp);
2138 }
2139
2140 static int
udp_build_hdr_template(conn_t * connp,const in6_addr_t * v6src,const in6_addr_t * v6dst,in_port_t dstport,uint32_t flowinfo)2141 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2142 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2143 {
2144 udpha_t *udpha;
2145 int error;
2146
2147 ASSERT(MUTEX_HELD(&connp->conn_lock));
2148 /*
2149 * We clear lastdst to make sure we don't use the lastdst path
2150 * next time sending since we might not have set v6dst yet.
2151 */
2152 connp->conn_v6lastdst = ipv6_all_zeros;
2153
2154 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2155 flowinfo);
2156 if (error != 0)
2157 return (error);
2158
2159 /*
2160 * Any routing header/option has been massaged. The checksum difference
2161 * is stored in conn_sum.
2162 */
2163 udpha = (udpha_t *)connp->conn_ht_ulp;
2164 udpha->uha_src_port = connp->conn_lport;
2165 udpha->uha_dst_port = dstport;
2166 udpha->uha_checksum = 0;
2167 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */
2168 return (0);
2169 }
2170
2171 static mblk_t *
udp_queue_fallback(udp_t * udp,mblk_t * mp)2172 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2173 {
2174 ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2175 if (IPCL_IS_NONSTR(udp->udp_connp)) {
2176 /*
2177 * fallback has started but messages have not been moved yet
2178 */
2179 if (udp->udp_fallback_queue_head == NULL) {
2180 ASSERT(udp->udp_fallback_queue_tail == NULL);
2181 udp->udp_fallback_queue_head = mp;
2182 udp->udp_fallback_queue_tail = mp;
2183 } else {
2184 ASSERT(udp->udp_fallback_queue_tail != NULL);
2185 udp->udp_fallback_queue_tail->b_next = mp;
2186 udp->udp_fallback_queue_tail = mp;
2187 }
2188 return (NULL);
2189 } else {
2190 /*
2191 * Fallback completed, let the caller putnext() the mblk.
2192 */
2193 return (mp);
2194 }
2195 }
2196
2197 /*
2198 * Deliver data to ULP. In case we have a socket, and it's falling back to
2199 * TPI, then we'll queue the mp for later processing.
2200 */
2201 static void
udp_ulp_recv(conn_t * connp,mblk_t * mp,uint_t len,ip_recv_attr_t * ira)2202 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2203 {
2204 if (IPCL_IS_NONSTR(connp)) {
2205 udp_t *udp = connp->conn_udp;
2206 int error;
2207
2208 ASSERT(len == msgdsize(mp));
2209 if ((*connp->conn_upcalls->su_recv)
2210 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2211 mutex_enter(&udp->udp_recv_lock);
2212 if (error == ENOSPC) {
2213 /*
2214 * let's confirm while holding the lock
2215 */
2216 if ((*connp->conn_upcalls->su_recv)
2217 (connp->conn_upper_handle, NULL, 0, 0,
2218 &error, NULL) < 0) {
2219 ASSERT(error == ENOSPC);
2220 if (error == ENOSPC) {
2221 connp->conn_flow_cntrld =
2222 B_TRUE;
2223 }
2224 }
2225 mutex_exit(&udp->udp_recv_lock);
2226 } else {
2227 ASSERT(error == EOPNOTSUPP);
2228 mp = udp_queue_fallback(udp, mp);
2229 mutex_exit(&udp->udp_recv_lock);
2230 if (mp != NULL)
2231 putnext(connp->conn_rq, mp);
2232 }
2233 }
2234 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2235 } else {
2236 if (is_system_labeled()) {
2237 ASSERT(ira->ira_cred != NULL);
2238 /*
2239 * Provide for protocols above UDP such as RPC
2240 * NOPID leaves db_cpid unchanged.
2241 */
2242 mblk_setcred(mp, ira->ira_cred, NOPID);
2243 }
2244
2245 putnext(connp->conn_rq, mp);
2246 }
2247 }
2248
2249 /*
2250 * This is the inbound data path.
2251 * IP has already pulled up the IP plus UDP headers and verified alignment
2252 * etc.
2253 */
2254 /* ARGSUSED2 */
2255 static void
udp_input(void * arg1,mblk_t * mp,void * arg2,ip_recv_attr_t * ira)2256 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2257 {
2258 conn_t *connp = (conn_t *)arg1;
2259 struct T_unitdata_ind *tudi;
2260 uchar_t *rptr; /* Pointer to IP header */
2261 int hdr_length; /* Length of IP+UDP headers */
2262 int udi_size; /* Size of T_unitdata_ind */
2263 int pkt_len;
2264 udp_t *udp;
2265 udpha_t *udpha;
2266 ip_pkt_t ipps;
2267 ip6_t *ip6h;
2268 mblk_t *mp1;
2269 uint32_t udp_ipv4_options_len;
2270 crb_t recv_ancillary;
2271 udp_stack_t *us;
2272 conn_t *new = NULL;
2273
2274 ASSERT(connp->conn_flags & IPCL_UDPCONN);
2275
2276 mutex_enter(&connp->conn_lock);
2277 if (connp->conn_reuselist != NULL) {
2278 struct reuselist *reusep = connp->conn_reuselist;
2279 int i;
2280
2281 /*
2282 * we have to balance the request between multiple sockets.
2283 * Currently we do this in a round-robin fashion. In the
2284 * reuselist we maintain a pointer to the last receiver.
2285 * TODO: we can add a check if the conn is full and skip to
2286 * the next.
2287 */
2288 mutex_enter(&reusep->ru_lock);
2289 i = reusep->ru_next;
2290 new = reusep->ru_conns[i];
2291 if (++i == reusep->ru_entries)
2292 i = 0;
2293 reusep->ru_next = i;
2294 if (new == connp)
2295 new = NULL;
2296 else
2297 CONN_INC_REF(new);
2298 mutex_exit(&reusep->ru_lock);
2299 mutex_exit(&connp->conn_lock);
2300 if (new != NULL)
2301 connp = new;
2302 } else {
2303 mutex_exit(&connp->conn_lock);
2304 }
2305
2306 udp = connp->conn_udp;
2307 us = udp->udp_us;
2308 rptr = mp->b_rptr;
2309
2310 ASSERT(DB_TYPE(mp) == M_DATA);
2311 ASSERT(OK_32PTR(rptr));
2312 ASSERT(ira->ira_pktlen == msgdsize(mp));
2313 pkt_len = ira->ira_pktlen;
2314
2315 /*
2316 * Get a snapshot of these and allow other threads to change
2317 * them after that. We need the same recv_ancillary when determining
2318 * the size as when adding the ancillary data items.
2319 */
2320 mutex_enter(&connp->conn_lock);
2321 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2322 recv_ancillary = connp->conn_recv_ancillary;
2323 mutex_exit(&connp->conn_lock);
2324
2325 hdr_length = ira->ira_ip_hdr_length;
2326
2327 /*
2328 * IP inspected the UDP header thus all of it must be in the mblk.
2329 * UDP length check is performed for IPv6 packets and IPv4 packets
2330 * to check if the size of the packet as specified
2331 * by the UDP header is the same as the length derived from the IP
2332 * header.
2333 */
2334 udpha = (udpha_t *)(rptr + hdr_length);
2335 if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2336 goto tossit;
2337
2338 hdr_length += UDPH_SIZE;
2339 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */
2340
2341 /* Initialize regardless of IP version */
2342 ipps.ipp_fields = 0;
2343
2344 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2345 udp_ipv4_options_len > 0) &&
2346 connp->conn_family == AF_INET) {
2347 int err;
2348
2349 /*
2350 * Record/update udp_recv_ipp with the lock
2351 * held. Not needed for AF_INET6 sockets
2352 * since they don't support a getsockopt of IP_OPTIONS.
2353 */
2354 mutex_enter(&connp->conn_lock);
2355 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2356 B_TRUE);
2357 if (err != 0) {
2358 /* Allocation failed. Drop packet */
2359 mutex_exit(&connp->conn_lock);
2360 goto tossit;
2361 }
2362 mutex_exit(&connp->conn_lock);
2363 }
2364
2365 if (recv_ancillary.crb_all != 0) {
2366 /*
2367 * Record packet information in the ip_pkt_t
2368 */
2369 if (ira->ira_flags & IRAF_IS_IPV4) {
2370 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2371 ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2372 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2373 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2374
2375 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2376 } else {
2377 uint8_t nexthdrp;
2378
2379 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2380 /*
2381 * IPv6 packets can only be received by applications
2382 * that are prepared to receive IPv6 addresses.
2383 * The IP fanout must ensure this.
2384 */
2385 ASSERT(connp->conn_family == AF_INET6);
2386
2387 ip6h = (ip6_t *)rptr;
2388
2389 /* We don't care about the length, but need the ipp */
2390 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2391 &nexthdrp);
2392 ASSERT(hdr_length == ira->ira_ip_hdr_length);
2393 /* Restore */
2394 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2395 ASSERT(nexthdrp == IPPROTO_UDP);
2396 }
2397 }
2398
2399 /*
2400 * This is the inbound data path. Packets are passed upstream as
2401 * T_UNITDATA_IND messages.
2402 */
2403 if (connp->conn_family == AF_INET) {
2404 sin_t *sin;
2405
2406 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2407
2408 /*
2409 * Normally only send up the source address.
2410 * If any ancillary data items are wanted we add those.
2411 */
2412 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2413 if (recv_ancillary.crb_all != 0) {
2414 udi_size += conn_recvancillary_size(connp,
2415 recv_ancillary, ira, mp, &ipps);
2416 }
2417
2418 /* Allocate a message block for the T_UNITDATA_IND structure. */
2419 mp1 = allocb(udi_size, BPRI_MED);
2420 if (mp1 == NULL)
2421 goto tossit;
2422 mp1->b_cont = mp;
2423 mp1->b_datap->db_type = M_PROTO;
2424 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2425 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2426 tudi->PRIM_type = T_UNITDATA_IND;
2427 tudi->SRC_length = sizeof (sin_t);
2428 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2429 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2430 sizeof (sin_t);
2431 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2432 tudi->OPT_length = udi_size;
2433 sin = (sin_t *)&tudi[1];
2434 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2435 sin->sin_port = udpha->uha_src_port;
2436 sin->sin_family = connp->conn_family;
2437 *(uint32_t *)&sin->sin_zero[0] = 0;
2438 *(uint32_t *)&sin->sin_zero[4] = 0;
2439
2440 /*
2441 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
2442 * IP_RECVTTL has been set.
2443 */
2444 if (udi_size != 0) {
2445 conn_recvancillary_add(connp, recv_ancillary, ira,
2446 &ipps, (uchar_t *)&sin[1], udi_size);
2447 }
2448 } else {
2449 sin6_t *sin6;
2450
2451 /*
2452 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2453 *
2454 * Normally we only send up the address. If receiving of any
2455 * optional receive side information is enabled, we also send
2456 * that up as options.
2457 */
2458 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2459
2460 if (recv_ancillary.crb_all != 0) {
2461 udi_size += conn_recvancillary_size(connp,
2462 recv_ancillary, ira, mp, &ipps);
2463 }
2464
2465 mp1 = allocb(udi_size, BPRI_MED);
2466 if (mp1 == NULL)
2467 goto tossit;
2468 mp1->b_cont = mp;
2469 mp1->b_datap->db_type = M_PROTO;
2470 tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2471 mp1->b_wptr = (uchar_t *)tudi + udi_size;
2472 tudi->PRIM_type = T_UNITDATA_IND;
2473 tudi->SRC_length = sizeof (sin6_t);
2474 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2475 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2476 sizeof (sin6_t);
2477 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2478 tudi->OPT_length = udi_size;
2479 sin6 = (sin6_t *)&tudi[1];
2480 if (ira->ira_flags & IRAF_IS_IPV4) {
2481 in6_addr_t v6dst;
2482
2483 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2484 &sin6->sin6_addr);
2485 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2486 &v6dst);
2487 sin6->sin6_flowinfo = 0;
2488 sin6->sin6_scope_id = 0;
2489 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2490 IPCL_ZONEID(connp), us->us_netstack);
2491 } else {
2492 ip6h = (ip6_t *)rptr;
2493
2494 sin6->sin6_addr = ip6h->ip6_src;
2495 /* No sin6_flowinfo per API */
2496 sin6->sin6_flowinfo = 0;
2497 /* For link-scope pass up scope id */
2498 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2499 sin6->sin6_scope_id = ira->ira_ruifindex;
2500 else
2501 sin6->sin6_scope_id = 0;
2502 sin6->__sin6_src_id = ip_srcid_find_addr(
2503 &ip6h->ip6_dst, IPCL_ZONEID(connp),
2504 us->us_netstack);
2505 }
2506 sin6->sin6_port = udpha->uha_src_port;
2507 sin6->sin6_family = connp->conn_family;
2508
2509 if (udi_size != 0) {
2510 conn_recvancillary_add(connp, recv_ancillary, ira,
2511 &ipps, (uchar_t *)&sin6[1], udi_size);
2512 }
2513 }
2514
2515 /*
2516 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2517 * loopback traffic).
2518 */
2519 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2520 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2521
2522 /* Walk past the headers unless IP_RECVHDR was set. */
2523 if (!udp->udp_rcvhdr) {
2524 mp->b_rptr = rptr + hdr_length;
2525 pkt_len -= hdr_length;
2526 }
2527
2528 UDPS_BUMP_MIB(us, udpHCInDatagrams);
2529 udp_ulp_recv(connp, mp1, pkt_len, ira);
2530 if (new != NULL)
2531 CONN_DEC_REF(new);
2532 return;
2533
2534 tossit:
2535 freemsg(mp);
2536 UDPS_BUMP_MIB(us, udpInErrors);
2537 if (new != NULL)
2538 CONN_DEC_REF(new);
2539 }
2540
2541 /*
2542 * This routine creates a T_UDERROR_IND message and passes it upstream.
2543 * The address and options are copied from the T_UNITDATA_REQ message
2544 * passed in mp. This message is freed.
2545 */
2546 static void
udp_ud_err(queue_t * q,mblk_t * mp,t_scalar_t err)2547 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2548 {
2549 struct T_unitdata_req *tudr;
2550 mblk_t *mp1;
2551 uchar_t *destaddr;
2552 t_scalar_t destlen;
2553 uchar_t *optaddr;
2554 t_scalar_t optlen;
2555
2556 if ((mp->b_wptr < mp->b_rptr) ||
2557 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2558 goto done;
2559 }
2560 tudr = (struct T_unitdata_req *)mp->b_rptr;
2561 destaddr = mp->b_rptr + tudr->DEST_offset;
2562 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2563 destaddr + tudr->DEST_length < mp->b_rptr ||
2564 destaddr + tudr->DEST_length > mp->b_wptr) {
2565 goto done;
2566 }
2567 optaddr = mp->b_rptr + tudr->OPT_offset;
2568 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2569 optaddr + tudr->OPT_length < mp->b_rptr ||
2570 optaddr + tudr->OPT_length > mp->b_wptr) {
2571 goto done;
2572 }
2573 destlen = tudr->DEST_length;
2574 optlen = tudr->OPT_length;
2575
2576 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2577 (char *)optaddr, optlen, err);
2578 if (mp1 != NULL)
2579 qreply(q, mp1);
2580
2581 done:
2582 freemsg(mp);
2583 }
2584
2585 /*
2586 * This routine removes a port number association from a stream. It
2587 * is called by udp_wput to handle T_UNBIND_REQ messages.
2588 */
2589 static void
udp_tpi_unbind(queue_t * q,mblk_t * mp)2590 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2591 {
2592 conn_t *connp = Q_TO_CONN(q);
2593 int error;
2594
2595 error = udp_do_unbind(connp);
2596 if (error) {
2597 if (error < 0)
2598 udp_err_ack(q, mp, -error, 0);
2599 else
2600 udp_err_ack(q, mp, TSYSERR, error);
2601 return;
2602 }
2603
2604 mp = mi_tpi_ok_ack_alloc(mp);
2605 ASSERT(mp != NULL);
2606 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2607 qreply(q, mp);
2608 }
2609
2610 /*
2611 * Don't let port fall into the privileged range.
2612 * Since the extra privileged ports can be arbitrary we also
2613 * ensure that we exclude those from consideration.
2614 * us->us_epriv_ports is not sorted thus we loop over it until
2615 * there are no changes.
2616 */
2617 static in_port_t
udp_update_next_port(udp_t * udp,in_port_t port,boolean_t random)2618 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2619 {
2620 int i, bump;
2621 in_port_t nextport;
2622 boolean_t restart = B_FALSE;
2623 udp_stack_t *us = udp->udp_us;
2624
2625 if (random && udp_random_anon_port != 0) {
2626 (void) random_get_pseudo_bytes((uint8_t *)&port,
2627 sizeof (in_port_t));
2628 /*
2629 * Unless changed by a sys admin, the smallest anon port
2630 * is 32768 and the largest anon port is 65535. It is
2631 * very likely (50%) for the random port to be smaller
2632 * than the smallest anon port. When that happens,
2633 * add port % (anon port range) to the smallest anon
2634 * port to get the random port. It should fall into the
2635 * valid anon port range.
2636 */
2637 if ((port < us->us_smallest_anon_port) ||
2638 (port > us->us_largest_anon_port)) {
2639 if (us->us_smallest_anon_port ==
2640 us->us_largest_anon_port) {
2641 bump = 0;
2642 } else {
2643 bump = port % (us->us_largest_anon_port -
2644 us->us_smallest_anon_port);
2645 }
2646
2647 port = us->us_smallest_anon_port + bump;
2648 }
2649 }
2650
2651 retry:
2652 if (port < us->us_smallest_anon_port)
2653 port = us->us_smallest_anon_port;
2654
2655 if (port > us->us_largest_anon_port) {
2656 port = us->us_smallest_anon_port;
2657 if (restart)
2658 return (0);
2659 restart = B_TRUE;
2660 }
2661
2662 if (port < us->us_smallest_nonpriv_port)
2663 port = us->us_smallest_nonpriv_port;
2664
2665 for (i = 0; i < us->us_num_epriv_ports; i++) {
2666 if (port == us->us_epriv_ports[i]) {
2667 port++;
2668 /*
2669 * Make sure that the port is in the
2670 * valid range.
2671 */
2672 goto retry;
2673 }
2674 }
2675
2676 if (is_system_labeled() &&
2677 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2678 port, IPPROTO_UDP, B_TRUE)) != 0) {
2679 port = nextport;
2680 goto retry;
2681 }
2682
2683 return (port);
2684 }
2685
2686 /*
2687 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2688 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2689 * the TPI options, otherwise we take them from msg_control.
2690 * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2691 * Always consumes mp; never consumes tudr_mp.
2692 */
2693 static int
udp_output_ancillary(conn_t * connp,sin_t * sin,sin6_t * sin6,mblk_t * mp,mblk_t * tudr_mp,struct nmsghdr * msg,cred_t * cr,pid_t pid)2694 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2695 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2696 {
2697 udp_t *udp = connp->conn_udp;
2698 udp_stack_t *us = udp->udp_us;
2699 int error;
2700 ip_xmit_attr_t *ixa;
2701 ip_pkt_t *ipp;
2702 in6_addr_t v6src;
2703 in6_addr_t v6dst;
2704 in6_addr_t v6nexthop;
2705 in_port_t dstport;
2706 uint32_t flowinfo;
2707 uint_t srcid;
2708 int is_absreq_failure = 0;
2709 conn_opt_arg_t coas, *coa;
2710
2711 ASSERT(tudr_mp != NULL || msg != NULL);
2712
2713 /*
2714 * Get ixa before checking state to handle a disconnect race.
2715 *
2716 * We need an exclusive copy of conn_ixa since the ancillary data
2717 * options might modify it. That copy has no pointers hence we
2718 * need to set them up once we've parsed the ancillary data.
2719 */
2720 ixa = conn_get_ixa_exclusive(connp);
2721 if (ixa == NULL) {
2722 UDPS_BUMP_MIB(us, udpOutErrors);
2723 freemsg(mp);
2724 return (ENOMEM);
2725 }
2726 ASSERT(cr != NULL);
2727 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2728 ixa->ixa_cred = cr;
2729 ixa->ixa_cpid = pid;
2730 if (is_system_labeled()) {
2731 /* We need to restart with a label based on the cred */
2732 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2733 }
2734
2735 /* In case previous destination was multicast or multirt */
2736 ip_attr_newdst(ixa);
2737
2738 /* Get a copy of conn_xmit_ipp since the options might change it */
2739 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2740 if (ipp == NULL) {
2741 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2742 ixa->ixa_cred = connp->conn_cred; /* Restore */
2743 ixa->ixa_cpid = connp->conn_cpid;
2744 ixa_refrele(ixa);
2745 UDPS_BUMP_MIB(us, udpOutErrors);
2746 freemsg(mp);
2747 return (ENOMEM);
2748 }
2749 mutex_enter(&connp->conn_lock);
2750 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2751 mutex_exit(&connp->conn_lock);
2752 if (error != 0) {
2753 UDPS_BUMP_MIB(us, udpOutErrors);
2754 freemsg(mp);
2755 goto done;
2756 }
2757
2758 /*
2759 * Parse the options and update ixa and ipp as a result.
2760 * Note that ixa_tsl can be updated if SCM_UCRED.
2761 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2762 */
2763
2764 coa = &coas;
2765 coa->coa_connp = connp;
2766 coa->coa_ixa = ixa;
2767 coa->coa_ipp = ipp;
2768 coa->coa_ancillary = B_TRUE;
2769 coa->coa_changed = 0;
2770
2771 if (msg != NULL) {
2772 error = process_auxiliary_options(connp, msg->msg_control,
2773 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2774 } else {
2775 struct T_unitdata_req *tudr;
2776
2777 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2778 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2779 error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2780 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2781 coa, &is_absreq_failure);
2782 }
2783 if (error != 0) {
2784 /*
2785 * Note: No special action needed in this
2786 * module for "is_absreq_failure"
2787 */
2788 freemsg(mp);
2789 UDPS_BUMP_MIB(us, udpOutErrors);
2790 goto done;
2791 }
2792 ASSERT(is_absreq_failure == 0);
2793
2794 mutex_enter(&connp->conn_lock);
2795 /*
2796 * If laddr is unspecified then we look at sin6_src_id.
2797 * We will give precedence to a source address set with IPV6_PKTINFO
2798 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2799 * want ip_attr_connect to select a source (since it can fail) when
2800 * IPV6_PKTINFO is specified.
2801 * If this doesn't result in a source address then we get a source
2802 * from ip_attr_connect() below.
2803 */
2804 v6src = connp->conn_saddr_v6;
2805 if (sin != NULL) {
2806 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2807 dstport = sin->sin_port;
2808 flowinfo = 0;
2809 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2810 ixa->ixa_flags |= IXAF_IS_IPV4;
2811 } else if (sin6 != NULL) {
2812 boolean_t v4mapped;
2813
2814 v6dst = sin6->sin6_addr;
2815 dstport = sin6->sin6_port;
2816 flowinfo = sin6->sin6_flowinfo;
2817 srcid = sin6->__sin6_src_id;
2818 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2819 ixa->ixa_scopeid = sin6->sin6_scope_id;
2820 ixa->ixa_flags |= IXAF_SCOPEID_SET;
2821 } else {
2822 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2823 }
2824 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
2825 if (v4mapped)
2826 ixa->ixa_flags |= IXAF_IS_IPV4;
2827 else
2828 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2829 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2830 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2831 v4mapped, connp->conn_netstack)) {
2832 /* Mismatch - v4mapped/v6 specified by srcid. */
2833 mutex_exit(&connp->conn_lock);
2834 error = EADDRNOTAVAIL;
2835 goto failed; /* Does freemsg() and mib. */
2836 }
2837 }
2838 } else {
2839 /* Connected case */
2840 v6dst = connp->conn_faddr_v6;
2841 dstport = connp->conn_fport;
2842 flowinfo = connp->conn_flowinfo;
2843 }
2844 mutex_exit(&connp->conn_lock);
2845
2846 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2847 if (ipp->ipp_fields & IPPF_ADDR) {
2848 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2849 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2850 v6src = ipp->ipp_addr;
2851 } else {
2852 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2853 v6src = ipp->ipp_addr;
2854 }
2855 }
2856
2857 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2858 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2859 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2860
2861 switch (error) {
2862 case 0:
2863 break;
2864 case EADDRNOTAVAIL:
2865 /*
2866 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2867 * Don't have the application see that errno
2868 */
2869 error = ENETUNREACH;
2870 goto failed;
2871 case ENETDOWN:
2872 /*
2873 * Have !ipif_addr_ready address; drop packet silently
2874 * until we can get applications to not send until we
2875 * are ready.
2876 */
2877 error = 0;
2878 goto failed;
2879 case EHOSTUNREACH:
2880 case ENETUNREACH:
2881 if (ixa->ixa_ire != NULL) {
2882 /*
2883 * Let conn_ip_output/ire_send_noroute return
2884 * the error and send any local ICMP error.
2885 */
2886 error = 0;
2887 break;
2888 }
2889 /* FALLTHRU */
2890 default:
2891 failed:
2892 freemsg(mp);
2893 UDPS_BUMP_MIB(us, udpOutErrors);
2894 goto done;
2895 }
2896
2897 /*
2898 * We might be going to a different destination than last time,
2899 * thus check that TX allows the communication and compute any
2900 * needed label.
2901 *
2902 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2903 * don't have to worry about concurrent threads.
2904 */
2905 if (is_system_labeled()) {
2906 /* Using UDP MLP requires SCM_UCRED from user */
2907 if (connp->conn_mlp_type != mlptSingle &&
2908 !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2909 UDPS_BUMP_MIB(us, udpOutErrors);
2910 error = ECONNREFUSED;
2911 freemsg(mp);
2912 goto done;
2913 }
2914 /*
2915 * Check whether Trusted Solaris policy allows communication
2916 * with this host, and pretend that the destination is
2917 * unreachable if not.
2918 * Compute any needed label and place it in ipp_label_v4/v6.
2919 *
2920 * Later conn_build_hdr_template/conn_prepend_hdr takes
2921 * ipp_label_v4/v6 to form the packet.
2922 *
2923 * Tsol note: We have ipp structure local to this thread so
2924 * no locking is needed.
2925 */
2926 error = conn_update_label(connp, ixa, &v6dst, ipp);
2927 if (error != 0) {
2928 freemsg(mp);
2929 UDPS_BUMP_MIB(us, udpOutErrors);
2930 goto done;
2931 }
2932 }
2933 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2934 flowinfo, mp, &error);
2935 if (mp == NULL) {
2936 ASSERT(error != 0);
2937 UDPS_BUMP_MIB(us, udpOutErrors);
2938 goto done;
2939 }
2940 if (ixa->ixa_pktlen > IP_MAXPACKET) {
2941 error = EMSGSIZE;
2942 UDPS_BUMP_MIB(us, udpOutErrors);
2943 freemsg(mp);
2944 goto done;
2945 }
2946 /* We're done. Pass the packet to ip. */
2947 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2948
2949 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2950 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2951 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2952
2953 error = conn_ip_output(mp, ixa);
2954 /* No udpOutErrors if an error since IP increases its error counter */
2955 switch (error) {
2956 case 0:
2957 break;
2958 case EWOULDBLOCK:
2959 (void) ixa_check_drain_insert(connp, ixa);
2960 error = 0;
2961 break;
2962 case EADDRNOTAVAIL:
2963 /*
2964 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2965 * Don't have the application see that errno
2966 */
2967 error = ENETUNREACH;
2968 /* FALLTHRU */
2969 default:
2970 mutex_enter(&connp->conn_lock);
2971 /*
2972 * Clear the source and v6lastdst so we call ip_attr_connect
2973 * for the next packet and try to pick a better source.
2974 */
2975 if (connp->conn_mcbc_bind)
2976 connp->conn_saddr_v6 = ipv6_all_zeros;
2977 else
2978 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
2979 connp->conn_v6lastdst = ipv6_all_zeros;
2980 mutex_exit(&connp->conn_lock);
2981 break;
2982 }
2983 done:
2984 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2985 ixa->ixa_cred = connp->conn_cred; /* Restore */
2986 ixa->ixa_cpid = connp->conn_cpid;
2987 ixa_refrele(ixa);
2988 ip_pkt_free(ipp);
2989 kmem_free(ipp, sizeof (*ipp));
2990 return (error);
2991 }
2992
2993 /*
2994 * Handle sending an M_DATA for a connected socket.
2995 * Handles both IPv4 and IPv6.
2996 */
2997 static int
udp_output_connected(conn_t * connp,mblk_t * mp,cred_t * cr,pid_t pid)2998 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
2999 {
3000 udp_t *udp = connp->conn_udp;
3001 udp_stack_t *us = udp->udp_us;
3002 int error;
3003 ip_xmit_attr_t *ixa;
3004
3005 /*
3006 * If no other thread is using conn_ixa this just gets a reference to
3007 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3008 */
3009 ixa = conn_get_ixa(connp, B_FALSE);
3010 if (ixa == NULL) {
3011 UDPS_BUMP_MIB(us, udpOutErrors);
3012 freemsg(mp);
3013 return (ENOMEM);
3014 }
3015
3016 ASSERT(cr != NULL);
3017 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3018 ixa->ixa_cred = cr;
3019 ixa->ixa_cpid = pid;
3020
3021 mutex_enter(&connp->conn_lock);
3022 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
3023 connp->conn_fport, connp->conn_flowinfo, &error);
3024
3025 if (mp == NULL) {
3026 ASSERT(error != 0);
3027 mutex_exit(&connp->conn_lock);
3028 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3029 ixa->ixa_cred = connp->conn_cred; /* Restore */
3030 ixa->ixa_cpid = connp->conn_cpid;
3031 ixa_refrele(ixa);
3032 UDPS_BUMP_MIB(us, udpOutErrors);
3033 freemsg(mp);
3034 return (error);
3035 }
3036
3037 /*
3038 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3039 * safe copy, then we need to fill in any pointers in it.
3040 */
3041 if (ixa->ixa_ire == NULL) {
3042 in6_addr_t faddr, saddr;
3043 in6_addr_t nexthop;
3044 in_port_t fport;
3045
3046 saddr = connp->conn_saddr_v6;
3047 faddr = connp->conn_faddr_v6;
3048 fport = connp->conn_fport;
3049 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3050 mutex_exit(&connp->conn_lock);
3051
3052 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3053 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3054 IPDF_IPSEC);
3055 switch (error) {
3056 case 0:
3057 break;
3058 case EADDRNOTAVAIL:
3059 /*
3060 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3061 * Don't have the application see that errno
3062 */
3063 error = ENETUNREACH;
3064 goto failed;
3065 case ENETDOWN:
3066 /*
3067 * Have !ipif_addr_ready address; drop packet silently
3068 * until we can get applications to not send until we
3069 * are ready.
3070 */
3071 error = 0;
3072 goto failed;
3073 case EHOSTUNREACH:
3074 case ENETUNREACH:
3075 if (ixa->ixa_ire != NULL) {
3076 /*
3077 * Let conn_ip_output/ire_send_noroute return
3078 * the error and send any local ICMP error.
3079 */
3080 error = 0;
3081 break;
3082 }
3083 /* FALLTHRU */
3084 default:
3085 failed:
3086 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3087 ixa->ixa_cred = connp->conn_cred; /* Restore */
3088 ixa->ixa_cpid = connp->conn_cpid;
3089 ixa_refrele(ixa);
3090 freemsg(mp);
3091 UDPS_BUMP_MIB(us, udpOutErrors);
3092 return (error);
3093 }
3094 } else {
3095 /* Done with conn_t */
3096 mutex_exit(&connp->conn_lock);
3097 }
3098 ASSERT(ixa->ixa_ire != NULL);
3099
3100 /* We're done. Pass the packet to ip. */
3101 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3102
3103 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3104 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3105 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3106
3107 error = conn_ip_output(mp, ixa);
3108 /* No udpOutErrors if an error since IP increases its error counter */
3109 switch (error) {
3110 case 0:
3111 break;
3112 case EWOULDBLOCK:
3113 (void) ixa_check_drain_insert(connp, ixa);
3114 error = 0;
3115 break;
3116 case EADDRNOTAVAIL:
3117 /*
3118 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3119 * Don't have the application see that errno
3120 */
3121 error = ENETUNREACH;
3122 break;
3123 }
3124 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3125 ixa->ixa_cred = connp->conn_cred; /* Restore */
3126 ixa->ixa_cpid = connp->conn_cpid;
3127 ixa_refrele(ixa);
3128 return (error);
3129 }
3130
3131 /*
3132 * Handle sending an M_DATA to the last destination.
3133 * Handles both IPv4 and IPv6.
3134 *
3135 * NOTE: The caller must hold conn_lock and we drop it here.
3136 */
3137 static int
udp_output_lastdst(conn_t * connp,mblk_t * mp,cred_t * cr,pid_t pid,ip_xmit_attr_t * ixa)3138 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3139 ip_xmit_attr_t *ixa)
3140 {
3141 udp_t *udp = connp->conn_udp;
3142 udp_stack_t *us = udp->udp_us;
3143 int error;
3144
3145 ASSERT(MUTEX_HELD(&connp->conn_lock));
3146 ASSERT(ixa != NULL);
3147
3148 ASSERT(cr != NULL);
3149 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3150 ixa->ixa_cred = cr;
3151 ixa->ixa_cpid = pid;
3152
3153 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3154 connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3155
3156 if (mp == NULL) {
3157 ASSERT(error != 0);
3158 mutex_exit(&connp->conn_lock);
3159 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3160 ixa->ixa_cred = connp->conn_cred; /* Restore */
3161 ixa->ixa_cpid = connp->conn_cpid;
3162 ixa_refrele(ixa);
3163 UDPS_BUMP_MIB(us, udpOutErrors);
3164 freemsg(mp);
3165 return (error);
3166 }
3167
3168 /*
3169 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3170 * safe copy, then we need to fill in any pointers in it.
3171 */
3172 if (ixa->ixa_ire == NULL) {
3173 in6_addr_t lastdst, lastsrc;
3174 in6_addr_t nexthop;
3175 in_port_t lastport;
3176
3177 lastsrc = connp->conn_v6lastsrc;
3178 lastdst = connp->conn_v6lastdst;
3179 lastport = connp->conn_lastdstport;
3180 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3181 mutex_exit(&connp->conn_lock);
3182
3183 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3184 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3185 IPDF_VERIFY_DST | IPDF_IPSEC);
3186 switch (error) {
3187 case 0:
3188 break;
3189 case EADDRNOTAVAIL:
3190 /*
3191 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3192 * Don't have the application see that errno
3193 */
3194 error = ENETUNREACH;
3195 goto failed;
3196 case ENETDOWN:
3197 /*
3198 * Have !ipif_addr_ready address; drop packet silently
3199 * until we can get applications to not send until we
3200 * are ready.
3201 */
3202 error = 0;
3203 goto failed;
3204 case EHOSTUNREACH:
3205 case ENETUNREACH:
3206 if (ixa->ixa_ire != NULL) {
3207 /*
3208 * Let conn_ip_output/ire_send_noroute return
3209 * the error and send any local ICMP error.
3210 */
3211 error = 0;
3212 break;
3213 }
3214 /* FALLTHRU */
3215 default:
3216 failed:
3217 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3218 ixa->ixa_cred = connp->conn_cred; /* Restore */
3219 ixa->ixa_cpid = connp->conn_cpid;
3220 ixa_refrele(ixa);
3221 freemsg(mp);
3222 UDPS_BUMP_MIB(us, udpOutErrors);
3223 return (error);
3224 }
3225 } else {
3226 /* Done with conn_t */
3227 mutex_exit(&connp->conn_lock);
3228 }
3229
3230 /* We're done. Pass the packet to ip. */
3231 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3232
3233 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3234 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3235 &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3236
3237 error = conn_ip_output(mp, ixa);
3238 /* No udpOutErrors if an error since IP increases its error counter */
3239 switch (error) {
3240 case 0:
3241 break;
3242 case EWOULDBLOCK:
3243 (void) ixa_check_drain_insert(connp, ixa);
3244 error = 0;
3245 break;
3246 case EADDRNOTAVAIL:
3247 /*
3248 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3249 * Don't have the application see that errno
3250 */
3251 error = ENETUNREACH;
3252 /* FALLTHRU */
3253 default:
3254 mutex_enter(&connp->conn_lock);
3255 /*
3256 * Clear the source and v6lastdst so we call ip_attr_connect
3257 * for the next packet and try to pick a better source.
3258 */
3259 if (connp->conn_mcbc_bind)
3260 connp->conn_saddr_v6 = ipv6_all_zeros;
3261 else
3262 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3263 connp->conn_v6lastdst = ipv6_all_zeros;
3264 mutex_exit(&connp->conn_lock);
3265 break;
3266 }
3267 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3268 ixa->ixa_cred = connp->conn_cred; /* Restore */
3269 ixa->ixa_cpid = connp->conn_cpid;
3270 ixa_refrele(ixa);
3271 return (error);
3272 }
3273
3274
3275 /*
3276 * Prepend the header template and then fill in the source and
3277 * flowinfo. The caller needs to handle the destination address since
3278 * it's setting is different if rthdr or source route.
3279 *
3280 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3281 * When it returns NULL it sets errorp.
3282 */
3283 static mblk_t *
udp_prepend_header_template(conn_t * connp,ip_xmit_attr_t * ixa,mblk_t * mp,const in6_addr_t * v6src,in_port_t dstport,uint32_t flowinfo,int * errorp)3284 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3285 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3286 {
3287 udp_t *udp = connp->conn_udp;
3288 udp_stack_t *us = udp->udp_us;
3289 boolean_t insert_spi = udp->udp_nat_t_endpoint;
3290 uint_t pktlen;
3291 uint_t alloclen;
3292 uint_t copylen;
3293 uint8_t *iph;
3294 uint_t ip_hdr_length;
3295 udpha_t *udpha;
3296 uint32_t cksum;
3297 ip_pkt_t *ipp;
3298
3299 ASSERT(MUTEX_HELD(&connp->conn_lock));
3300
3301 /*
3302 * Copy the header template and leave space for an SPI
3303 */
3304 copylen = connp->conn_ht_iphc_len;
3305 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3306 pktlen = alloclen + msgdsize(mp);
3307 if (pktlen > IP_MAXPACKET) {
3308 freemsg(mp);
3309 *errorp = EMSGSIZE;
3310 return (NULL);
3311 }
3312 ixa->ixa_pktlen = pktlen;
3313
3314 /* check/fix buffer config, setup pointers into it */
3315 iph = mp->b_rptr - alloclen;
3316 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3317 mblk_t *mp1;
3318
3319 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3320 if (mp1 == NULL) {
3321 freemsg(mp);
3322 *errorp = ENOMEM;
3323 return (NULL);
3324 }
3325 mp1->b_wptr = DB_LIM(mp1);
3326 mp1->b_cont = mp;
3327 mp = mp1;
3328 iph = (mp->b_wptr - alloclen);
3329 }
3330 mp->b_rptr = iph;
3331 bcopy(connp->conn_ht_iphc, iph, copylen);
3332 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3333
3334 ixa->ixa_ip_hdr_length = ip_hdr_length;
3335 udpha = (udpha_t *)(iph + ip_hdr_length);
3336
3337 /*
3338 * Setup header length and prepare for ULP checksum done in IP.
3339 * udp_build_hdr_template has already massaged any routing header
3340 * and placed the result in conn_sum.
3341 *
3342 * We make it easy for IP to include our pseudo header
3343 * by putting our length in uha_checksum.
3344 */
3345 cksum = pktlen - ip_hdr_length;
3346 udpha->uha_length = htons(cksum);
3347
3348 cksum += connp->conn_sum;
3349 cksum = (cksum >> 16) + (cksum & 0xFFFF);
3350 ASSERT(cksum < 0x10000);
3351
3352 ipp = &connp->conn_xmit_ipp;
3353 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3354 ipha_t *ipha = (ipha_t *)iph;
3355
3356 ipha->ipha_length = htons((uint16_t)pktlen);
3357
3358 /* IP does the checksum if uha_checksum is non-zero */
3359 if (us->us_do_checksum)
3360 udpha->uha_checksum = htons(cksum);
3361
3362 /* if IP_PKTINFO specified an addres it wins over bind() */
3363 if ((ipp->ipp_fields & IPPF_ADDR) &&
3364 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3365 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3366 ipha->ipha_src = ipp->ipp_addr_v4;
3367 } else {
3368 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3369 }
3370 } else {
3371 ip6_t *ip6h = (ip6_t *)iph;
3372
3373 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3374 udpha->uha_checksum = htons(cksum);
3375
3376 /* if IP_PKTINFO specified an addres it wins over bind() */
3377 if ((ipp->ipp_fields & IPPF_ADDR) &&
3378 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3379 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3380 ip6h->ip6_src = ipp->ipp_addr;
3381 } else {
3382 ip6h->ip6_src = *v6src;
3383 }
3384 ip6h->ip6_vcf =
3385 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3386 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3387 if (ipp->ipp_fields & IPPF_TCLASS) {
3388 /* Overrides the class part of flowinfo */
3389 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3390 ipp->ipp_tclass);
3391 }
3392 }
3393
3394 /* Insert all-0s SPI now. */
3395 if (insert_spi)
3396 *((uint32_t *)(udpha + 1)) = 0;
3397
3398 udpha->uha_dst_port = dstport;
3399 return (mp);
3400 }
3401
3402 /*
3403 * Send a T_UDERR_IND in response to an M_DATA
3404 */
3405 static void
udp_ud_err_connected(conn_t * connp,t_scalar_t error)3406 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3407 {
3408 struct sockaddr_storage ss;
3409 sin_t *sin;
3410 sin6_t *sin6;
3411 struct sockaddr *addr;
3412 socklen_t addrlen;
3413 mblk_t *mp1;
3414
3415 mutex_enter(&connp->conn_lock);
3416 /* Initialize addr and addrlen as if they're passed in */
3417 if (connp->conn_family == AF_INET) {
3418 sin = (sin_t *)&ss;
3419 *sin = sin_null;
3420 sin->sin_family = AF_INET;
3421 sin->sin_port = connp->conn_fport;
3422 sin->sin_addr.s_addr = connp->conn_faddr_v4;
3423 addr = (struct sockaddr *)sin;
3424 addrlen = sizeof (*sin);
3425 } else {
3426 sin6 = (sin6_t *)&ss;
3427 *sin6 = sin6_null;
3428 sin6->sin6_family = AF_INET6;
3429 sin6->sin6_port = connp->conn_fport;
3430 sin6->sin6_flowinfo = connp->conn_flowinfo;
3431 sin6->sin6_addr = connp->conn_faddr_v6;
3432 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3433 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3434 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3435 } else {
3436 sin6->sin6_scope_id = 0;
3437 }
3438 sin6->__sin6_src_id = 0;
3439 addr = (struct sockaddr *)sin6;
3440 addrlen = sizeof (*sin6);
3441 }
3442 mutex_exit(&connp->conn_lock);
3443
3444 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3445 if (mp1 != NULL)
3446 putnext(connp->conn_rq, mp1);
3447 }
3448
3449 /*
3450 * This routine handles all messages passed downstream. It either
3451 * consumes the message or passes it downstream; it never queues a
3452 * a message.
3453 *
3454 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode
3455 * is valid when we are directly beneath the stream head, and thus sockfs
3456 * is able to bypass STREAMS and directly call us, passing along the sockaddr
3457 * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3458 * connected endpoints.
3459 */
3460 void
udp_wput(queue_t * q,mblk_t * mp)3461 udp_wput(queue_t *q, mblk_t *mp)
3462 {
3463 sin6_t *sin6;
3464 sin_t *sin = NULL;
3465 uint_t srcid;
3466 conn_t *connp = Q_TO_CONN(q);
3467 udp_t *udp = connp->conn_udp;
3468 int error = 0;
3469 struct sockaddr *addr = NULL;
3470 socklen_t addrlen;
3471 udp_stack_t *us = udp->udp_us;
3472 struct T_unitdata_req *tudr;
3473 mblk_t *data_mp;
3474 ushort_t ipversion;
3475 cred_t *cr;
3476 pid_t pid;
3477
3478 /*
3479 * We directly handle several cases here: T_UNITDATA_REQ message
3480 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3481 * socket.
3482 */
3483 switch (DB_TYPE(mp)) {
3484 case M_DATA:
3485 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3486 /* Not connected; address is required */
3487 UDPS_BUMP_MIB(us, udpOutErrors);
3488 UDP_DBGSTAT(us, udp_data_notconn);
3489 UDP_STAT(us, udp_out_err_notconn);
3490 freemsg(mp);
3491 return;
3492 }
3493 /*
3494 * All Solaris components should pass a db_credp
3495 * for this message, hence we ASSERT.
3496 * On production kernels we return an error to be robust against
3497 * random streams modules sitting on top of us.
3498 */
3499 cr = msg_getcred(mp, &pid);
3500 ASSERT(cr != NULL);
3501 if (cr == NULL) {
3502 UDPS_BUMP_MIB(us, udpOutErrors);
3503 freemsg(mp);
3504 return;
3505 }
3506 ASSERT(udp->udp_issocket);
3507 UDP_DBGSTAT(us, udp_data_conn);
3508 error = udp_output_connected(connp, mp, cr, pid);
3509 if (error != 0) {
3510 UDP_STAT(us, udp_out_err_output);
3511 if (connp->conn_rq != NULL)
3512 udp_ud_err_connected(connp, (t_scalar_t)error);
3513 #ifdef DEBUG
3514 printf("udp_output_connected returned %d\n", error);
3515 #endif
3516 }
3517 return;
3518
3519 case M_PROTO:
3520 case M_PCPROTO:
3521 tudr = (struct T_unitdata_req *)mp->b_rptr;
3522 if (MBLKL(mp) < sizeof (*tudr) ||
3523 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3524 udp_wput_other(q, mp);
3525 return;
3526 }
3527 break;
3528
3529 default:
3530 udp_wput_other(q, mp);
3531 return;
3532 }
3533
3534 /* Handle valid T_UNITDATA_REQ here */
3535 data_mp = mp->b_cont;
3536 if (data_mp == NULL) {
3537 error = EPROTO;
3538 goto ud_error2;
3539 }
3540 mp->b_cont = NULL;
3541
3542 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3543 error = EADDRNOTAVAIL;
3544 goto ud_error2;
3545 }
3546
3547 /*
3548 * All Solaris components should pass a db_credp
3549 * for this TPI message, hence we should ASSERT.
3550 * However, RPC (svc_clts_ksend) does this odd thing where it
3551 * passes the options from a T_UNITDATA_IND unchanged in a
3552 * T_UNITDATA_REQ. While that is the right thing to do for
3553 * some options, SCM_UCRED being the key one, this also makes it
3554 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3555 */
3556 cr = msg_getcred(mp, &pid);
3557 if (cr == NULL) {
3558 cr = connp->conn_cred;
3559 pid = connp->conn_cpid;
3560 }
3561
3562 /*
3563 * If a port has not been bound to the stream, fail.
3564 * This is not a problem when sockfs is directly
3565 * above us, because it will ensure that the socket
3566 * is first bound before allowing data to be sent.
3567 */
3568 if (udp->udp_state == TS_UNBND) {
3569 error = EPROTO;
3570 goto ud_error2;
3571 }
3572 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3573 addrlen = tudr->DEST_length;
3574
3575 switch (connp->conn_family) {
3576 case AF_INET6:
3577 sin6 = (sin6_t *)addr;
3578 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3579 (sin6->sin6_family != AF_INET6)) {
3580 error = EADDRNOTAVAIL;
3581 goto ud_error2;
3582 }
3583
3584 srcid = sin6->__sin6_src_id;
3585 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3586 /*
3587 * Destination is a non-IPv4-compatible IPv6 address.
3588 * Send out an IPv6 format packet.
3589 */
3590
3591 /*
3592 * If the local address is a mapped address return
3593 * an error.
3594 * It would be possible to send an IPv6 packet but the
3595 * response would never make it back to the application
3596 * since it is bound to a mapped address.
3597 */
3598 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3599 error = EADDRNOTAVAIL;
3600 goto ud_error2;
3601 }
3602
3603 UDP_DBGSTAT(us, udp_out_ipv6);
3604
3605 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3606 sin6->sin6_addr = ipv6_loopback;
3607 ipversion = IPV6_VERSION;
3608 } else {
3609 if (connp->conn_ipv6_v6only) {
3610 error = EADDRNOTAVAIL;
3611 goto ud_error2;
3612 }
3613
3614 /*
3615 * If the local address is not zero or a mapped address
3616 * return an error. It would be possible to send an
3617 * IPv4 packet but the response would never make it
3618 * back to the application since it is bound to a
3619 * non-mapped address.
3620 */
3621 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3622 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3623 error = EADDRNOTAVAIL;
3624 goto ud_error2;
3625 }
3626 UDP_DBGSTAT(us, udp_out_mapped);
3627
3628 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3629 V4_PART_OF_V6(sin6->sin6_addr) =
3630 htonl(INADDR_LOOPBACK);
3631 }
3632 ipversion = IPV4_VERSION;
3633 }
3634
3635 if (tudr->OPT_length != 0) {
3636 /*
3637 * If we are connected then the destination needs to be
3638 * the same as the connected one.
3639 */
3640 if (udp->udp_state == TS_DATA_XFER &&
3641 !conn_same_as_last_v6(connp, sin6)) {
3642 error = EISCONN;
3643 goto ud_error2;
3644 }
3645 UDP_STAT(us, udp_out_opt);
3646 error = udp_output_ancillary(connp, NULL, sin6,
3647 data_mp, mp, NULL, cr, pid);
3648 } else {
3649 ip_xmit_attr_t *ixa;
3650
3651 /*
3652 * We have to allocate an ip_xmit_attr_t before we grab
3653 * conn_lock and we need to hold conn_lock once we've
3654 * checked conn_same_as_last_v6 to handle concurrent
3655 * send* calls on a socket.
3656 */
3657 ixa = conn_get_ixa(connp, B_FALSE);
3658 if (ixa == NULL) {
3659 error = ENOMEM;
3660 goto ud_error2;
3661 }
3662 mutex_enter(&connp->conn_lock);
3663
3664 if (conn_same_as_last_v6(connp, sin6) &&
3665 connp->conn_lastsrcid == srcid &&
3666 ipsec_outbound_policy_current(ixa)) {
3667 UDP_DBGSTAT(us, udp_out_lastdst);
3668 /* udp_output_lastdst drops conn_lock */
3669 error = udp_output_lastdst(connp, data_mp, cr,
3670 pid, ixa);
3671 } else {
3672 UDP_DBGSTAT(us, udp_out_diffdst);
3673 /* udp_output_newdst drops conn_lock */
3674 error = udp_output_newdst(connp, data_mp, NULL,
3675 sin6, ipversion, cr, pid, ixa);
3676 }
3677 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3678 }
3679 if (error == 0) {
3680 freeb(mp);
3681 return;
3682 }
3683 break;
3684
3685 case AF_INET:
3686 sin = (sin_t *)addr;
3687 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3688 (sin->sin_family != AF_INET)) {
3689 error = EADDRNOTAVAIL;
3690 goto ud_error2;
3691 }
3692 UDP_DBGSTAT(us, udp_out_ipv4);
3693 if (sin->sin_addr.s_addr == INADDR_ANY)
3694 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3695 ipversion = IPV4_VERSION;
3696
3697 srcid = 0;
3698 if (tudr->OPT_length != 0) {
3699 /*
3700 * If we are connected then the destination needs to be
3701 * the same as the connected one.
3702 */
3703 if (udp->udp_state == TS_DATA_XFER &&
3704 !conn_same_as_last_v4(connp, sin)) {
3705 error = EISCONN;
3706 goto ud_error2;
3707 }
3708 UDP_STAT(us, udp_out_opt);
3709 error = udp_output_ancillary(connp, sin, NULL,
3710 data_mp, mp, NULL, cr, pid);
3711 } else {
3712 ip_xmit_attr_t *ixa;
3713
3714 /*
3715 * We have to allocate an ip_xmit_attr_t before we grab
3716 * conn_lock and we need to hold conn_lock once we've
3717 * checked conn_same_as_last_v4 to handle concurrent
3718 * send* calls on a socket.
3719 */
3720 ixa = conn_get_ixa(connp, B_FALSE);
3721 if (ixa == NULL) {
3722 error = ENOMEM;
3723 goto ud_error2;
3724 }
3725 mutex_enter(&connp->conn_lock);
3726
3727 if (conn_same_as_last_v4(connp, sin) &&
3728 ipsec_outbound_policy_current(ixa)) {
3729 UDP_DBGSTAT(us, udp_out_lastdst);
3730 /* udp_output_lastdst drops conn_lock */
3731 error = udp_output_lastdst(connp, data_mp, cr,
3732 pid, ixa);
3733 } else {
3734 UDP_DBGSTAT(us, udp_out_diffdst);
3735 /* udp_output_newdst drops conn_lock */
3736 error = udp_output_newdst(connp, data_mp, sin,
3737 NULL, ipversion, cr, pid, ixa);
3738 }
3739 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3740 }
3741 if (error == 0) {
3742 freeb(mp);
3743 return;
3744 }
3745 break;
3746 }
3747 UDP_STAT(us, udp_out_err_output);
3748 ASSERT(mp != NULL);
3749 /* mp is freed by the following routine */
3750 udp_ud_err(q, mp, (t_scalar_t)error);
3751 return;
3752
3753 ud_error2:
3754 UDPS_BUMP_MIB(us, udpOutErrors);
3755 freemsg(data_mp);
3756 UDP_STAT(us, udp_out_err_output);
3757 ASSERT(mp != NULL);
3758 /* mp is freed by the following routine */
3759 udp_ud_err(q, mp, (t_scalar_t)error);
3760 }
3761
3762 /*
3763 * Handle the case of the IP address, port, flow label being different
3764 * for both IPv4 and IPv6.
3765 *
3766 * NOTE: The caller must hold conn_lock and we drop it here.
3767 */
3768 static int
udp_output_newdst(conn_t * connp,mblk_t * data_mp,sin_t * sin,sin6_t * sin6,ushort_t ipversion,cred_t * cr,pid_t pid,ip_xmit_attr_t * ixa)3769 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3770 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3771 {
3772 uint_t srcid;
3773 uint32_t flowinfo;
3774 udp_t *udp = connp->conn_udp;
3775 int error = 0;
3776 ip_xmit_attr_t *oldixa;
3777 udp_stack_t *us = udp->udp_us;
3778 in6_addr_t v6src;
3779 in6_addr_t v6dst;
3780 in6_addr_t v6nexthop;
3781 in_port_t dstport;
3782
3783 ASSERT(MUTEX_HELD(&connp->conn_lock));
3784 ASSERT(ixa != NULL);
3785 /*
3786 * We hold conn_lock across all the use and modifications of
3787 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3788 * stay consistent.
3789 */
3790
3791 ASSERT(cr != NULL);
3792 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3793 ixa->ixa_cred = cr;
3794 ixa->ixa_cpid = pid;
3795 if (is_system_labeled()) {
3796 /* We need to restart with a label based on the cred */
3797 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3798 }
3799
3800 /*
3801 * If we are connected then the destination needs to be the
3802 * same as the connected one, which is not the case here since we
3803 * checked for that above.
3804 */
3805 if (udp->udp_state == TS_DATA_XFER) {
3806 mutex_exit(&connp->conn_lock);
3807 error = EISCONN;
3808 goto ud_error;
3809 }
3810
3811 /* In case previous destination was multicast or multirt */
3812 ip_attr_newdst(ixa);
3813
3814 /*
3815 * If laddr is unspecified then we look at sin6_src_id.
3816 * We will give precedence to a source address set with IPV6_PKTINFO
3817 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3818 * want ip_attr_connect to select a source (since it can fail) when
3819 * IPV6_PKTINFO is specified.
3820 * If this doesn't result in a source address then we get a source
3821 * from ip_attr_connect() below.
3822 */
3823 v6src = connp->conn_saddr_v6;
3824 if (sin != NULL) {
3825 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3826 dstport = sin->sin_port;
3827 flowinfo = 0;
3828 /* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3829 srcid = 0;
3830 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3831 ixa->ixa_flags |= IXAF_IS_IPV4;
3832 } else {
3833 boolean_t v4mapped;
3834
3835 v6dst = sin6->sin6_addr;
3836 dstport = sin6->sin6_port;
3837 flowinfo = sin6->sin6_flowinfo;
3838 srcid = sin6->__sin6_src_id;
3839 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3840 ixa->ixa_scopeid = sin6->sin6_scope_id;
3841 ixa->ixa_flags |= IXAF_SCOPEID_SET;
3842 } else {
3843 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3844 }
3845 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
3846 if (v4mapped)
3847 ixa->ixa_flags |= IXAF_IS_IPV4;
3848 else
3849 ixa->ixa_flags &= ~IXAF_IS_IPV4;
3850 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3851 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3852 v4mapped, connp->conn_netstack)) {
3853 /* Mismatched v4mapped/v6 specified by srcid. */
3854 mutex_exit(&connp->conn_lock);
3855 error = EADDRNOTAVAIL;
3856 goto ud_error;
3857 }
3858 }
3859 }
3860 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3861 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3862 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3863
3864 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3865 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3866 v6src = ipp->ipp_addr;
3867 } else {
3868 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3869 v6src = ipp->ipp_addr;
3870 }
3871 }
3872
3873 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3874 mutex_exit(&connp->conn_lock);
3875
3876 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3877 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3878 switch (error) {
3879 case 0:
3880 break;
3881 case EADDRNOTAVAIL:
3882 /*
3883 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3884 * Don't have the application see that errno
3885 */
3886 error = ENETUNREACH;
3887 goto failed;
3888 case ENETDOWN:
3889 /*
3890 * Have !ipif_addr_ready address; drop packet silently
3891 * until we can get applications to not send until we
3892 * are ready.
3893 */
3894 error = 0;
3895 goto failed;
3896 case EHOSTUNREACH:
3897 case ENETUNREACH:
3898 if (ixa->ixa_ire != NULL) {
3899 /*
3900 * Let conn_ip_output/ire_send_noroute return
3901 * the error and send any local ICMP error.
3902 */
3903 error = 0;
3904 break;
3905 }
3906 /* FALLTHRU */
3907 failed:
3908 default:
3909 goto ud_error;
3910 }
3911
3912
3913 /*
3914 * Cluster note: we let the cluster hook know that we are sending to a
3915 * new address and/or port.
3916 */
3917 if (cl_inet_connect2 != NULL) {
3918 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3919 if (error != 0) {
3920 error = EHOSTUNREACH;
3921 goto ud_error;
3922 }
3923 }
3924
3925 mutex_enter(&connp->conn_lock);
3926 /*
3927 * While we dropped the lock some other thread might have connected
3928 * this socket. If so we bail out with EISCONN to ensure that the
3929 * connecting thread is the one that updates conn_ixa, conn_ht_*
3930 * and conn_*last*.
3931 */
3932 if (udp->udp_state == TS_DATA_XFER) {
3933 mutex_exit(&connp->conn_lock);
3934 error = EISCONN;
3935 goto ud_error;
3936 }
3937
3938 /*
3939 * We need to rebuild the headers if
3940 * - we are labeling packets (could be different for different
3941 * destinations)
3942 * - we have a source route (or routing header) since we need to
3943 * massage that to get the pseudo-header checksum
3944 * - the IP version is different than the last time
3945 * - a socket option with COA_HEADER_CHANGED has been set which
3946 * set conn_v6lastdst to zero.
3947 *
3948 * Otherwise the prepend function will just update the src, dst,
3949 * dstport, and flow label.
3950 */
3951 if (is_system_labeled()) {
3952 /* TX MLP requires SCM_UCRED and don't have that here */
3953 if (connp->conn_mlp_type != mlptSingle) {
3954 mutex_exit(&connp->conn_lock);
3955 error = ECONNREFUSED;
3956 goto ud_error;
3957 }
3958 /*
3959 * Check whether Trusted Solaris policy allows communication
3960 * with this host, and pretend that the destination is
3961 * unreachable if not.
3962 * Compute any needed label and place it in ipp_label_v4/v6.
3963 *
3964 * Later conn_build_hdr_template/conn_prepend_hdr takes
3965 * ipp_label_v4/v6 to form the packet.
3966 *
3967 * Tsol note: Since we hold conn_lock we know no other
3968 * thread manipulates conn_xmit_ipp.
3969 */
3970 error = conn_update_label(connp, ixa, &v6dst,
3971 &connp->conn_xmit_ipp);
3972 if (error != 0) {
3973 mutex_exit(&connp->conn_lock);
3974 goto ud_error;
3975 }
3976 /* Rebuild the header template */
3977 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
3978 flowinfo);
3979 if (error != 0) {
3980 mutex_exit(&connp->conn_lock);
3981 goto ud_error;
3982 }
3983 } else if ((connp->conn_xmit_ipp.ipp_fields &
3984 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
3985 ipversion != connp->conn_lastipversion ||
3986 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
3987 /* Rebuild the header template */
3988 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
3989 flowinfo);
3990 if (error != 0) {
3991 mutex_exit(&connp->conn_lock);
3992 goto ud_error;
3993 }
3994 } else {
3995 /* Simply update the destination address if no source route */
3996 if (ixa->ixa_flags & IXAF_IS_IPV4) {
3997 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc;
3998
3999 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4000 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4001 ipha->ipha_fragment_offset_and_flags |=
4002 IPH_DF_HTONS;
4003 } else {
4004 ipha->ipha_fragment_offset_and_flags &=
4005 ~IPH_DF_HTONS;
4006 }
4007 } else {
4008 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4009 ip6h->ip6_dst = v6dst;
4010 }
4011 }
4012
4013 /*
4014 * Remember the dst/dstport etc which corresponds to the built header
4015 * template and conn_ixa.
4016 */
4017 oldixa = conn_replace_ixa(connp, ixa);
4018 connp->conn_v6lastdst = v6dst;
4019 connp->conn_lastipversion = ipversion;
4020 connp->conn_lastdstport = dstport;
4021 connp->conn_lastflowinfo = flowinfo;
4022 connp->conn_lastscopeid = ixa->ixa_scopeid;
4023 connp->conn_lastsrcid = srcid;
4024 /* Also remember a source to use together with lastdst */
4025 connp->conn_v6lastsrc = v6src;
4026
4027 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
4028 dstport, flowinfo, &error);
4029
4030 /* Done with conn_t */
4031 mutex_exit(&connp->conn_lock);
4032 ixa_refrele(oldixa);
4033
4034 if (data_mp == NULL) {
4035 ASSERT(error != 0);
4036 goto ud_error;
4037 }
4038
4039 /* We're done. Pass the packet to ip. */
4040 UDPS_BUMP_MIB(us, udpHCOutDatagrams);
4041
4042 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
4043 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
4044 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
4045
4046 error = conn_ip_output(data_mp, ixa);
4047 /* No udpOutErrors if an error since IP increases its error counter */
4048 switch (error) {
4049 case 0:
4050 break;
4051 case EWOULDBLOCK:
4052 (void) ixa_check_drain_insert(connp, ixa);
4053 error = 0;
4054 break;
4055 case EADDRNOTAVAIL:
4056 /*
4057 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4058 * Don't have the application see that errno
4059 */
4060 error = ENETUNREACH;
4061 /* FALLTHRU */
4062 default:
4063 mutex_enter(&connp->conn_lock);
4064 /*
4065 * Clear the source and v6lastdst so we call ip_attr_connect
4066 * for the next packet and try to pick a better source.
4067 */
4068 if (connp->conn_mcbc_bind)
4069 connp->conn_saddr_v6 = ipv6_all_zeros;
4070 else
4071 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4072 connp->conn_v6lastdst = ipv6_all_zeros;
4073 mutex_exit(&connp->conn_lock);
4074 break;
4075 }
4076 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4077 ixa->ixa_cred = connp->conn_cred; /* Restore */
4078 ixa->ixa_cpid = connp->conn_cpid;
4079 ixa_refrele(ixa);
4080 return (error);
4081
4082 ud_error:
4083 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4084 ixa->ixa_cred = connp->conn_cred; /* Restore */
4085 ixa->ixa_cpid = connp->conn_cpid;
4086 ixa_refrele(ixa);
4087
4088 freemsg(data_mp);
4089 UDPS_BUMP_MIB(us, udpOutErrors);
4090 UDP_STAT(us, udp_out_err_output);
4091 return (error);
4092 }
4093
4094 /* ARGSUSED */
4095 static void
udp_wput_fallback(queue_t * wq,mblk_t * mp)4096 udp_wput_fallback(queue_t *wq, mblk_t *mp)
4097 {
4098 #ifdef DEBUG
4099 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4100 #endif
4101 freemsg(mp);
4102 }
4103
4104
4105 /*
4106 * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4107 */
4108 static void
udp_wput_cmdblk(queue_t * q,mblk_t * mp)4109 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4110 {
4111 void *data;
4112 mblk_t *datamp = mp->b_cont;
4113 conn_t *connp = Q_TO_CONN(q);
4114 udp_t *udp = connp->conn_udp;
4115 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4116
4117 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4118 cmdp->cb_error = EPROTO;
4119 qreply(q, mp);
4120 return;
4121 }
4122 data = datamp->b_rptr;
4123
4124 mutex_enter(&connp->conn_lock);
4125 switch (cmdp->cb_cmd) {
4126 case TI_GETPEERNAME:
4127 if (udp->udp_state != TS_DATA_XFER)
4128 cmdp->cb_error = ENOTCONN;
4129 else
4130 cmdp->cb_error = conn_getpeername(connp, data,
4131 &cmdp->cb_len);
4132 break;
4133 case TI_GETMYNAME:
4134 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4135 break;
4136 default:
4137 cmdp->cb_error = EINVAL;
4138 break;
4139 }
4140 mutex_exit(&connp->conn_lock);
4141
4142 qreply(q, mp);
4143 }
4144
4145 static void
udp_use_pure_tpi(udp_t * udp)4146 udp_use_pure_tpi(udp_t *udp)
4147 {
4148 conn_t *connp = udp->udp_connp;
4149
4150 mutex_enter(&connp->conn_lock);
4151 udp->udp_issocket = B_FALSE;
4152 mutex_exit(&connp->conn_lock);
4153 UDP_STAT(udp->udp_us, udp_sock_fallback);
4154 }
4155
4156 static void
udp_wput_other(queue_t * q,mblk_t * mp)4157 udp_wput_other(queue_t *q, mblk_t *mp)
4158 {
4159 uchar_t *rptr = mp->b_rptr;
4160 struct iocblk *iocp;
4161 conn_t *connp = Q_TO_CONN(q);
4162 udp_t *udp = connp->conn_udp;
4163 cred_t *cr;
4164
4165 switch (mp->b_datap->db_type) {
4166 case M_CMD:
4167 udp_wput_cmdblk(q, mp);
4168 return;
4169
4170 case M_PROTO:
4171 case M_PCPROTO:
4172 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4173 /*
4174 * If the message does not contain a PRIM_type,
4175 * throw it away.
4176 */
4177 freemsg(mp);
4178 return;
4179 }
4180 switch (((t_primp_t)rptr)->type) {
4181 case T_ADDR_REQ:
4182 udp_addr_req(q, mp);
4183 return;
4184 case O_T_BIND_REQ:
4185 case T_BIND_REQ:
4186 udp_tpi_bind(q, mp);
4187 return;
4188 case T_CONN_REQ:
4189 udp_tpi_connect(q, mp);
4190 return;
4191 case T_CAPABILITY_REQ:
4192 udp_capability_req(q, mp);
4193 return;
4194 case T_INFO_REQ:
4195 udp_info_req(q, mp);
4196 return;
4197 case T_UNITDATA_REQ:
4198 /*
4199 * If a T_UNITDATA_REQ gets here, the address must
4200 * be bad. Valid T_UNITDATA_REQs are handled
4201 * in udp_wput.
4202 */
4203 udp_ud_err(q, mp, EADDRNOTAVAIL);
4204 return;
4205 case T_UNBIND_REQ:
4206 udp_tpi_unbind(q, mp);
4207 return;
4208 case T_SVR4_OPTMGMT_REQ:
4209 /*
4210 * All Solaris components should pass a db_credp
4211 * for this TPI message, hence we ASSERT.
4212 * But in case there is some other M_PROTO that looks
4213 * like a TPI message sent by some other kernel
4214 * component, we check and return an error.
4215 */
4216 cr = msg_getcred(mp, NULL);
4217 ASSERT(cr != NULL);
4218 if (cr == NULL) {
4219 udp_err_ack(q, mp, TSYSERR, EINVAL);
4220 return;
4221 }
4222 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4223 cr)) {
4224 svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4225 }
4226 return;
4227
4228 case T_OPTMGMT_REQ:
4229 /*
4230 * All Solaris components should pass a db_credp
4231 * for this TPI message, hence we ASSERT.
4232 * But in case there is some other M_PROTO that looks
4233 * like a TPI message sent by some other kernel
4234 * component, we check and return an error.
4235 */
4236 cr = msg_getcred(mp, NULL);
4237 ASSERT(cr != NULL);
4238 if (cr == NULL) {
4239 udp_err_ack(q, mp, TSYSERR, EINVAL);
4240 return;
4241 }
4242 tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4243 return;
4244
4245 case T_DISCON_REQ:
4246 udp_tpi_disconnect(q, mp);
4247 return;
4248
4249 /* The following TPI message is not supported by udp. */
4250 case O_T_CONN_RES:
4251 case T_CONN_RES:
4252 udp_err_ack(q, mp, TNOTSUPPORT, 0);
4253 return;
4254
4255 /* The following 3 TPI requests are illegal for udp. */
4256 case T_DATA_REQ:
4257 case T_EXDATA_REQ:
4258 case T_ORDREL_REQ:
4259 udp_err_ack(q, mp, TNOTSUPPORT, 0);
4260 return;
4261 default:
4262 break;
4263 }
4264 break;
4265 case M_FLUSH:
4266 if (*rptr & FLUSHW)
4267 flushq(q, FLUSHDATA);
4268 break;
4269 case M_IOCTL:
4270 iocp = (struct iocblk *)mp->b_rptr;
4271 switch (iocp->ioc_cmd) {
4272 case TI_GETPEERNAME:
4273 if (udp->udp_state != TS_DATA_XFER) {
4274 /*
4275 * If a default destination address has not
4276 * been associated with the stream, then we
4277 * don't know the peer's name.
4278 */
4279 iocp->ioc_error = ENOTCONN;
4280 iocp->ioc_count = 0;
4281 mp->b_datap->db_type = M_IOCACK;
4282 qreply(q, mp);
4283 return;
4284 }
4285 /* FALLTHRU */
4286 case TI_GETMYNAME:
4287 /*
4288 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4289 * need to copyin the user's strbuf structure.
4290 * Processing will continue in the M_IOCDATA case
4291 * below.
4292 */
4293 mi_copyin(q, mp, NULL,
4294 SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4295 return;
4296 case _SIOCSOCKFALLBACK:
4297 /*
4298 * Either sockmod is about to be popped and the
4299 * socket would now be treated as a plain stream,
4300 * or a module is about to be pushed so we have
4301 * to follow pure TPI semantics.
4302 */
4303 if (!udp->udp_issocket) {
4304 DB_TYPE(mp) = M_IOCNAK;
4305 iocp->ioc_error = EINVAL;
4306 } else {
4307 udp_use_pure_tpi(udp);
4308
4309 DB_TYPE(mp) = M_IOCACK;
4310 iocp->ioc_error = 0;
4311 }
4312 iocp->ioc_count = 0;
4313 iocp->ioc_rval = 0;
4314 qreply(q, mp);
4315 return;
4316 default:
4317 break;
4318 }
4319 break;
4320 case M_IOCDATA:
4321 udp_wput_iocdata(q, mp);
4322 return;
4323 default:
4324 /* Unrecognized messages are passed through without change. */
4325 break;
4326 }
4327 ip_wput_nondata(q, mp);
4328 }
4329
4330 /*
4331 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4332 * messages.
4333 */
4334 static void
udp_wput_iocdata(queue_t * q,mblk_t * mp)4335 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4336 {
4337 mblk_t *mp1;
4338 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
4339 STRUCT_HANDLE(strbuf, sb);
4340 uint_t addrlen;
4341 conn_t *connp = Q_TO_CONN(q);
4342 udp_t *udp = connp->conn_udp;
4343
4344 /* Make sure it is one of ours. */
4345 switch (iocp->ioc_cmd) {
4346 case TI_GETMYNAME:
4347 case TI_GETPEERNAME:
4348 break;
4349 default:
4350 ip_wput_nondata(q, mp);
4351 return;
4352 }
4353
4354 switch (mi_copy_state(q, mp, &mp1)) {
4355 case -1:
4356 return;
4357 case MI_COPY_CASE(MI_COPY_IN, 1):
4358 break;
4359 case MI_COPY_CASE(MI_COPY_OUT, 1):
4360 /*
4361 * The address has been copied out, so now
4362 * copyout the strbuf.
4363 */
4364 mi_copyout(q, mp);
4365 return;
4366 case MI_COPY_CASE(MI_COPY_OUT, 2):
4367 /*
4368 * The address and strbuf have been copied out.
4369 * We're done, so just acknowledge the original
4370 * M_IOCTL.
4371 */
4372 mi_copy_done(q, mp, 0);
4373 return;
4374 default:
4375 /*
4376 * Something strange has happened, so acknowledge
4377 * the original M_IOCTL with an EPROTO error.
4378 */
4379 mi_copy_done(q, mp, EPROTO);
4380 return;
4381 }
4382
4383 /*
4384 * Now we have the strbuf structure for TI_GETMYNAME
4385 * and TI_GETPEERNAME. Next we copyout the requested
4386 * address and then we'll copyout the strbuf.
4387 */
4388 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4389
4390 if (connp->conn_family == AF_INET)
4391 addrlen = sizeof (sin_t);
4392 else
4393 addrlen = sizeof (sin6_t);
4394
4395 if (STRUCT_FGET(sb, maxlen) < addrlen) {
4396 mi_copy_done(q, mp, EINVAL);
4397 return;
4398 }
4399
4400 switch (iocp->ioc_cmd) {
4401 case TI_GETMYNAME:
4402 break;
4403 case TI_GETPEERNAME:
4404 if (udp->udp_state != TS_DATA_XFER) {
4405 mi_copy_done(q, mp, ENOTCONN);
4406 return;
4407 }
4408 break;
4409 }
4410 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4411 if (!mp1)
4412 return;
4413
4414 STRUCT_FSET(sb, len, addrlen);
4415 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4416 case TI_GETMYNAME:
4417 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4418 &addrlen);
4419 break;
4420 case TI_GETPEERNAME:
4421 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4422 &addrlen);
4423 break;
4424 }
4425 mp1->b_wptr += addrlen;
4426 /* Copy out the address */
4427 mi_copyout(q, mp);
4428 }
4429
4430 void
udp_ddi_g_init(void)4431 udp_ddi_g_init(void)
4432 {
4433 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4434 udp_opt_obj.odb_opt_arr_cnt);
4435
4436 /*
4437 * We want to be informed each time a stack is created or
4438 * destroyed in the kernel, so we can maintain the
4439 * set of udp_stack_t's.
4440 */
4441 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4442 }
4443
4444 void
udp_ddi_g_destroy(void)4445 udp_ddi_g_destroy(void)
4446 {
4447 netstack_unregister(NS_UDP);
4448 }
4449
4450 #define INET_NAME "ip"
4451
4452 /*
4453 * Initialize the UDP stack instance.
4454 */
4455 static void *
udp_stack_init(netstackid_t stackid,netstack_t * ns)4456 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4457 {
4458 udp_stack_t *us;
4459 int i;
4460 int error = 0;
4461 major_t major;
4462 size_t arrsz;
4463
4464 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4465 us->us_netstack = ns;
4466
4467 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4468 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4469 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4470 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4471
4472 /*
4473 * The smallest anonymous port in the priviledged port range which UDP
4474 * looks for free port. Use in the option UDP_ANONPRIVBIND.
4475 */
4476 us->us_min_anonpriv_port = 512;
4477
4478 us->us_bind_fanout_size = udp_bind_fanout_size;
4479
4480 /* Roundup variable that might have been modified in /etc/system */
4481 if (!ISP2(us->us_bind_fanout_size)) {
4482 /* Not a power of two. Round up to nearest power of two */
4483 for (i = 0; i < 31; i++) {
4484 if (us->us_bind_fanout_size < (1 << i))
4485 break;
4486 }
4487 us->us_bind_fanout_size = 1 << i;
4488 }
4489 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4490 sizeof (udp_fanout_t), KM_SLEEP);
4491 for (i = 0; i < us->us_bind_fanout_size; i++) {
4492 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4493 NULL);
4494 }
4495
4496 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4497 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4498 KM_SLEEP);
4499 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4500
4501 /* Allocate the per netstack stats */
4502 mutex_enter(&cpu_lock);
4503 us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4504 mutex_exit(&cpu_lock);
4505 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *),
4506 KM_SLEEP);
4507 for (i = 0; i < us->us_sc_cnt; i++) {
4508 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4509 KM_SLEEP);
4510 }
4511
4512 us->us_kstat = udp_kstat2_init(stackid);
4513 us->us_mibkp = udp_kstat_init(stackid);
4514
4515 major = mod_name_to_major(INET_NAME);
4516 error = ldi_ident_from_major(major, &us->us_ldi_ident);
4517 ASSERT(error == 0);
4518 return (us);
4519 }
4520
4521 /*
4522 * Free the UDP stack instance.
4523 */
4524 static void
udp_stack_fini(netstackid_t stackid,void * arg)4525 udp_stack_fini(netstackid_t stackid, void *arg)
4526 {
4527 udp_stack_t *us = (udp_stack_t *)arg;
4528 int i;
4529
4530 for (i = 0; i < us->us_bind_fanout_size; i++) {
4531 mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4532 }
4533
4534 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4535 sizeof (udp_fanout_t));
4536
4537 us->us_bind_fanout = NULL;
4538
4539 for (i = 0; i < us->us_sc_cnt; i++)
4540 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4541 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4542
4543 kmem_free(us->us_propinfo_tbl,
4544 udp_propinfo_count * sizeof (mod_prop_info_t));
4545 us->us_propinfo_tbl = NULL;
4546
4547 udp_kstat_fini(stackid, us->us_mibkp);
4548 us->us_mibkp = NULL;
4549
4550 udp_kstat2_fini(stackid, us->us_kstat);
4551 us->us_kstat = NULL;
4552
4553 mutex_destroy(&us->us_epriv_port_lock);
4554 ldi_ident_release(us->us_ldi_ident);
4555 kmem_free(us, sizeof (*us));
4556 }
4557
4558 static size_t
udp_set_rcv_hiwat(udp_t * udp,size_t size)4559 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4560 {
4561 udp_stack_t *us = udp->udp_us;
4562
4563 /* We add a bit of extra buffering */
4564 size += size >> 1;
4565 if (size > us->us_max_buf)
4566 size = us->us_max_buf;
4567
4568 udp->udp_rcv_hiwat = size;
4569 return (size);
4570 }
4571
4572 /*
4573 * For the lower queue so that UDP can be a dummy mux.
4574 * Nobody should be sending
4575 * packets up this stream
4576 */
4577 static void
udp_lrput(queue_t * q,mblk_t * mp)4578 udp_lrput(queue_t *q, mblk_t *mp)
4579 {
4580 switch (mp->b_datap->db_type) {
4581 case M_FLUSH:
4582 /* Turn around */
4583 if (*mp->b_rptr & FLUSHW) {
4584 *mp->b_rptr &= ~FLUSHR;
4585 qreply(q, mp);
4586 return;
4587 }
4588 break;
4589 }
4590 freemsg(mp);
4591 }
4592
4593 /*
4594 * For the lower queue so that UDP can be a dummy mux.
4595 * Nobody should be sending packets down this stream.
4596 */
4597 /* ARGSUSED */
4598 void
udp_lwput(queue_t * q,mblk_t * mp)4599 udp_lwput(queue_t *q, mblk_t *mp)
4600 {
4601 freemsg(mp);
4602 }
4603
4604 /*
4605 * When a CPU is added, we need to allocate the per CPU stats struct.
4606 */
4607 void
udp_stack_cpu_add(udp_stack_t * us,processorid_t cpu_seqid)4608 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4609 {
4610 int i;
4611
4612 if (cpu_seqid < us->us_sc_cnt)
4613 return;
4614 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4615 ASSERT(us->us_sc[i] == NULL);
4616 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4617 KM_SLEEP);
4618 }
4619 membar_producer();
4620 us->us_sc_cnt = cpu_seqid + 1;
4621 }
4622
4623 /*
4624 * Below routines for UDP socket module.
4625 */
4626
4627 static conn_t *
udp_do_open(cred_t * credp,boolean_t isv6,int flags,int * errorp)4628 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4629 {
4630 udp_t *udp;
4631 conn_t *connp;
4632 zoneid_t zoneid;
4633 netstack_t *ns;
4634 udp_stack_t *us;
4635 int len;
4636
4637 ASSERT(errorp != NULL);
4638
4639 if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4640 return (NULL);
4641
4642 ns = netstack_find_by_cred(credp);
4643 ASSERT(ns != NULL);
4644 us = ns->netstack_udp;
4645 ASSERT(us != NULL);
4646
4647 /*
4648 * For exclusive stacks we set the zoneid to zero
4649 * to make UDP operate as if in the global zone.
4650 */
4651 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4652 zoneid = GLOBAL_ZONEID;
4653 else
4654 zoneid = crgetzoneid(credp);
4655
4656 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4657
4658 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4659 if (connp == NULL) {
4660 netstack_rele(ns);
4661 *errorp = ENOMEM;
4662 return (NULL);
4663 }
4664 udp = connp->conn_udp;
4665
4666 /*
4667 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4668 * done by netstack_find_by_cred()
4669 */
4670 netstack_rele(ns);
4671
4672 /*
4673 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4674 * need to lock anything.
4675 */
4676 ASSERT(connp->conn_proto == IPPROTO_UDP);
4677 ASSERT(connp->conn_udp == udp);
4678 ASSERT(udp->udp_connp == connp);
4679
4680 /* Set the initial state of the stream and the privilege status. */
4681 udp->udp_state = TS_UNBND;
4682 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4683 if (isv6) {
4684 connp->conn_family = AF_INET6;
4685 connp->conn_ipversion = IPV6_VERSION;
4686 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4687 connp->conn_default_ttl = us->us_ipv6_hoplimit;
4688 len = sizeof (ip6_t) + UDPH_SIZE;
4689 } else {
4690 connp->conn_family = AF_INET;
4691 connp->conn_ipversion = IPV4_VERSION;
4692 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4693 connp->conn_default_ttl = us->us_ipv4_ttl;
4694 len = sizeof (ipha_t) + UDPH_SIZE;
4695 }
4696
4697 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4698 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4699
4700 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4701 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4702 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4703 connp->conn_ixa->ixa_zoneid = zoneid;
4704
4705 connp->conn_zoneid = zoneid;
4706
4707 /*
4708 * If the caller has the process-wide flag set, then default to MAC
4709 * exempt mode. This allows read-down to unlabeled hosts.
4710 */
4711 if (getpflags(NET_MAC_AWARE, credp) != 0)
4712 connp->conn_mac_mode = CONN_MAC_AWARE;
4713
4714 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4715
4716 udp->udp_us = us;
4717
4718 connp->conn_rcvbuf = us->us_recv_hiwat;
4719 connp->conn_sndbuf = us->us_xmit_hiwat;
4720 connp->conn_sndlowat = us->us_xmit_lowat;
4721 connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4722
4723 connp->conn_wroff = len + us->us_wroff_extra;
4724 connp->conn_so_type = SOCK_DGRAM;
4725
4726 connp->conn_recv = udp_input;
4727 connp->conn_recvicmp = udp_icmp_input;
4728 crhold(credp);
4729 connp->conn_cred = credp;
4730 connp->conn_cpid = curproc->p_pid;
4731 connp->conn_open_time = ddi_get_lbolt64();
4732 /* Cache things in ixa without an extra refhold */
4733 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4734 connp->conn_ixa->ixa_cred = connp->conn_cred;
4735 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4736 if (is_system_labeled())
4737 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4738
4739 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4740
4741 if (us->us_pmtu_discovery)
4742 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4743
4744 return (connp);
4745 }
4746
4747 sock_lower_handle_t
udp_create(int family,int type,int proto,sock_downcalls_t ** sock_downcalls,uint_t * smodep,int * errorp,int flags,cred_t * credp)4748 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4749 uint_t *smodep, int *errorp, int flags, cred_t *credp)
4750 {
4751 udp_t *udp = NULL;
4752 udp_stack_t *us;
4753 conn_t *connp;
4754 boolean_t isv6;
4755
4756 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4757 (proto != 0 && proto != IPPROTO_UDP)) {
4758 *errorp = EPROTONOSUPPORT;
4759 return (NULL);
4760 }
4761
4762 if (family == AF_INET6)
4763 isv6 = B_TRUE;
4764 else
4765 isv6 = B_FALSE;
4766
4767 connp = udp_do_open(credp, isv6, flags, errorp);
4768 if (connp == NULL)
4769 return (NULL);
4770
4771 udp = connp->conn_udp;
4772 ASSERT(udp != NULL);
4773 us = udp->udp_us;
4774 ASSERT(us != NULL);
4775
4776 udp->udp_issocket = B_TRUE;
4777 connp->conn_flags |= IPCL_NONSTR;
4778
4779 /*
4780 * Set flow control
4781 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4782 * need to lock anything.
4783 */
4784 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4785 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4786
4787 connp->conn_flow_cntrld = B_FALSE;
4788
4789 mutex_enter(&connp->conn_lock);
4790 connp->conn_state_flags &= ~CONN_INCIPIENT;
4791 mutex_exit(&connp->conn_lock);
4792
4793 *errorp = 0;
4794 *smodep = SM_ATOMIC;
4795 *sock_downcalls = &sock_udp_downcalls;
4796 return ((sock_lower_handle_t)connp);
4797 }
4798
4799 /* ARGSUSED3 */
4800 void
udp_activate(sock_lower_handle_t proto_handle,sock_upper_handle_t sock_handle,sock_upcalls_t * sock_upcalls,int flags,cred_t * cr)4801 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4802 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4803 {
4804 conn_t *connp = (conn_t *)proto_handle;
4805 struct sock_proto_props sopp;
4806
4807 /* All Solaris components should pass a cred for this operation. */
4808 ASSERT(cr != NULL);
4809
4810 connp->conn_upcalls = sock_upcalls;
4811 connp->conn_upper_handle = sock_handle;
4812
4813 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4814 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4815 sopp.sopp_wroff = connp->conn_wroff;
4816 sopp.sopp_maxblk = INFPSZ;
4817 sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4818 sopp.sopp_rxlowat = connp->conn_rcvlowat;
4819 sopp.sopp_maxaddrlen = sizeof (sin6_t);
4820 sopp.sopp_maxpsz =
4821 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4822 UDP_MAXPACKET_IPV6;
4823 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4824 udp_mod_info.mi_minpsz;
4825
4826 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4827 &sopp);
4828 }
4829
4830 static void
udp_do_close(conn_t * connp)4831 udp_do_close(conn_t *connp)
4832 {
4833 udp_t *udp;
4834
4835 ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4836 udp = connp->conn_udp;
4837
4838 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4839 /*
4840 * Running in cluster mode - register unbind information
4841 */
4842 if (connp->conn_ipversion == IPV4_VERSION) {
4843 (*cl_inet_unbind)(
4844 connp->conn_netstack->netstack_stackid,
4845 IPPROTO_UDP, AF_INET,
4846 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4847 (in_port_t)connp->conn_lport, NULL);
4848 } else {
4849 (*cl_inet_unbind)(
4850 connp->conn_netstack->netstack_stackid,
4851 IPPROTO_UDP, AF_INET6,
4852 (uint8_t *)&(connp->conn_laddr_v6),
4853 (in_port_t)connp->conn_lport, NULL);
4854 }
4855 }
4856
4857 udp_bind_hash_remove(udp, B_FALSE);
4858 udp_reuselist_remove(connp);
4859
4860 ip_quiesce_conn(connp);
4861
4862 if (!IPCL_IS_NONSTR(connp)) {
4863 ASSERT(connp->conn_wq != NULL);
4864 ASSERT(connp->conn_rq != NULL);
4865 qprocsoff(connp->conn_rq);
4866 }
4867
4868 udp_close_free(connp);
4869
4870 /*
4871 * Now we are truly single threaded on this stream, and can
4872 * delete the things hanging off the connp, and finally the connp.
4873 * We removed this connp from the fanout list, it cannot be
4874 * accessed thru the fanouts, and we already waited for the
4875 * conn_ref to drop to 0. We are already in close, so
4876 * there cannot be any other thread from the top. qprocsoff
4877 * has completed, and service has completed or won't run in
4878 * future.
4879 */
4880 ASSERT(connp->conn_ref == 1);
4881
4882 if (!IPCL_IS_NONSTR(connp)) {
4883 inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4884 } else {
4885 ip_free_helper_stream(connp);
4886 }
4887
4888 connp->conn_ref--;
4889 ipcl_conn_destroy(connp);
4890 }
4891
4892 /* ARGSUSED1 */
4893 int
udp_close(sock_lower_handle_t proto_handle,int flags,cred_t * cr)4894 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4895 {
4896 conn_t *connp = (conn_t *)proto_handle;
4897
4898 /* All Solaris components should pass a cred for this operation. */
4899 ASSERT(cr != NULL);
4900
4901 udp_do_close(connp);
4902 return (0);
4903 }
4904
4905 static int
udp_do_bind(conn_t * connp,struct sockaddr * sa,socklen_t len,cred_t * cr,boolean_t bind_to_req_port_only)4906 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4907 boolean_t bind_to_req_port_only)
4908 {
4909 sin_t *sin;
4910 sin6_t *sin6;
4911 udp_t *udp = connp->conn_udp;
4912 int error = 0;
4913 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
4914 in_port_t port; /* Host byte order */
4915 in_port_t requested_port; /* Host byte order */
4916 int count;
4917 ipaddr_t v4src; /* Set if AF_INET */
4918 in6_addr_t v6src;
4919 int loopmax;
4920 udp_fanout_t *udpf;
4921 in_port_t lport; /* Network byte order */
4922 uint_t scopeid = 0;
4923 zoneid_t zoneid = IPCL_ZONEID(connp);
4924 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
4925 boolean_t is_inaddr_any;
4926 mlp_type_t addrtype, mlptype;
4927 udp_stack_t *us = udp->udp_us;
4928 struct reuselist *reusep;
4929
4930 switch (len) {
4931 case sizeof (sin_t): /* Complete IPv4 address */
4932 sin = (sin_t *)sa;
4933
4934 if (sin == NULL || !OK_32PTR((char *)sin))
4935 return (EINVAL);
4936
4937 if (connp->conn_family != AF_INET ||
4938 sin->sin_family != AF_INET) {
4939 return (EAFNOSUPPORT);
4940 }
4941 v4src = sin->sin_addr.s_addr;
4942 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
4943 if (v4src != INADDR_ANY) {
4944 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
4945 B_TRUE);
4946 }
4947 port = ntohs(sin->sin_port);
4948 break;
4949
4950 case sizeof (sin6_t): /* complete IPv6 address */
4951 sin6 = (sin6_t *)sa;
4952
4953 if (sin6 == NULL || !OK_32PTR((char *)sin6))
4954 return (EINVAL);
4955
4956 if (connp->conn_family != AF_INET6 ||
4957 sin6->sin6_family != AF_INET6) {
4958 return (EAFNOSUPPORT);
4959 }
4960 v6src = sin6->sin6_addr;
4961 if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
4962 if (connp->conn_ipv6_v6only)
4963 return (EADDRNOTAVAIL);
4964
4965 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
4966 if (v4src != INADDR_ANY) {
4967 laddr_type = ip_laddr_verify_v4(v4src,
4968 zoneid, ipst, B_FALSE);
4969 }
4970 } else {
4971 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
4972 if (IN6_IS_ADDR_LINKSCOPE(&v6src))
4973 scopeid = sin6->sin6_scope_id;
4974 laddr_type = ip_laddr_verify_v6(&v6src,
4975 zoneid, ipst, B_TRUE, scopeid);
4976 }
4977 }
4978 port = ntohs(sin6->sin6_port);
4979 break;
4980
4981 default: /* Invalid request */
4982 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
4983 "udp_bind: bad ADDR_length length %u", len);
4984 return (-TBADADDR);
4985 }
4986
4987 /* Is the local address a valid unicast, multicast, or broadcast? */
4988 if (laddr_type == IPVL_BAD)
4989 return (EADDRNOTAVAIL);
4990
4991 requested_port = port;
4992
4993 if (requested_port == 0 || !bind_to_req_port_only)
4994 bind_to_req_port_only = B_FALSE;
4995 else /* T_BIND_REQ and requested_port != 0 */
4996 bind_to_req_port_only = B_TRUE;
4997
4998 if (requested_port == 0) {
4999 /*
5000 * If the application passed in zero for the port number, it
5001 * doesn't care which port number we bind to. Get one in the
5002 * valid range.
5003 */
5004 if (connp->conn_anon_priv_bind) {
5005 port = udp_get_next_priv_port(udp);
5006 } else {
5007 port = udp_update_next_port(udp,
5008 us->us_next_port_to_try, B_TRUE);
5009 }
5010 } else {
5011 /*
5012 * If the port is in the well-known privileged range,
5013 * make sure the caller was privileged.
5014 */
5015 int i;
5016 boolean_t priv = B_FALSE;
5017
5018 if (port < us->us_smallest_nonpriv_port) {
5019 priv = B_TRUE;
5020 } else {
5021 for (i = 0; i < us->us_num_epriv_ports; i++) {
5022 if (port == us->us_epriv_ports[i]) {
5023 priv = B_TRUE;
5024 break;
5025 }
5026 }
5027 }
5028
5029 if (priv) {
5030 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
5031 return (-TACCES);
5032 }
5033 }
5034
5035 if (port == 0)
5036 return (-TNOADDR);
5037
5038 /*
5039 * get some memory we might need later on for reuseport, avoid
5040 * KM_SLEEP under lock
5041 */
5042 reusep = kmem_zalloc(sizeof (*reusep), KM_SLEEP);
5043 mutex_init(&reusep->ru_lock, NULL, MUTEX_DEFAULT, NULL);
5044
5045 mutex_enter(&connp->conn_lock);
5046
5047 if (!connp->conn_reuseport) {
5048 mutex_destroy(&reusep->ru_lock);
5049 kmem_free(reusep, sizeof (*reusep));
5050 reusep = NULL;
5051 }
5052
5053 /*
5054 * The state must be TS_UNBND. TPI mandates that users must send
5055 * TPI primitives only 1 at a time and wait for the response before
5056 * sending the next primitive.
5057 */
5058 if (udp->udp_state != TS_UNBND) {
5059 mutex_exit(&connp->conn_lock);
5060 if (reusep != NULL) {
5061 mutex_destroy(&reusep->ru_lock);
5062 kmem_free(reusep, sizeof (*reusep));
5063 }
5064 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5065 "udp_bind: bad state, %u", udp->udp_state);
5066 return (-TOUTSTATE);
5067 }
5068 /*
5069 * Copy the source address into our udp structure. This address
5070 * may still be zero; if so, IP will fill in the correct address
5071 * each time an outbound packet is passed to it. Since the udp is
5072 * not yet in the bind hash list, we don't grab the uf_lock to
5073 * change conn_ipversion
5074 */
5075 if (connp->conn_family == AF_INET) {
5076 ASSERT(sin != NULL);
5077 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
5078 } else {
5079 if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5080 /*
5081 * no need to hold the uf_lock to set the conn_ipversion
5082 * since we are not yet in the fanout list
5083 */
5084 connp->conn_ipversion = IPV4_VERSION;
5085 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
5086 } else {
5087 connp->conn_ipversion = IPV6_VERSION;
5088 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
5089 }
5090 }
5091
5092 /*
5093 * If conn_reuseaddr is not set, then we have to make sure that
5094 * the IP address and port number the application requested
5095 * (or we selected for the application) is not being used by
5096 * another stream. If another stream is already using the
5097 * requested IP address and port, the behavior depends on
5098 * "bind_to_req_port_only". If set the bind fails; otherwise we
5099 * search for any an unused port to bind to the stream.
5100 *
5101 * As per the BSD semantics, as modified by the Deering multicast
5102 * changes, if udp_reuseaddr is set, then we allow multiple binds
5103 * to the same port independent of the local IP address.
5104 *
5105 * This is slightly different than in SunOS 4.X which did not
5106 * support IP multicast. Note that the change implemented by the
5107 * Deering multicast code effects all binds - not only binding
5108 * to IP multicast addresses.
5109 *
5110 * Note that when binding to port zero we ignore SO_REUSEADDR in
5111 * order to guarantee a unique port.
5112 */
5113
5114 count = 0;
5115 if (connp->conn_anon_priv_bind) {
5116 /*
5117 * loopmax = (IPPORT_RESERVED-1) -
5118 * us->us_min_anonpriv_port + 1
5119 */
5120 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5121 } else {
5122 loopmax = us->us_largest_anon_port -
5123 us->us_smallest_anon_port + 1;
5124 }
5125
5126 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5127
5128 for (;;) {
5129 udp_t *udp1;
5130 boolean_t found_exclbind = B_FALSE;
5131 conn_t *connp1;
5132
5133 /*
5134 * Walk through the list of udp streams bound to
5135 * requested port with the same IP address.
5136 */
5137 lport = htons(port);
5138 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5139 us->us_bind_fanout_size)];
5140 mutex_enter(&udpf->uf_lock);
5141 for (udp1 = udpf->uf_udp; udp1 != NULL;
5142 udp1 = udp1->udp_bind_hash) {
5143 connp1 = udp1->udp_connp;
5144
5145 if (lport != connp1->conn_lport)
5146 continue;
5147
5148 /*
5149 * On a labeled system, we must treat bindings to ports
5150 * on shared IP addresses by sockets with MAC exemption
5151 * privilege as being in all zones, as there's
5152 * otherwise no way to identify the right receiver.
5153 */
5154 if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5155 continue;
5156
5157 /*
5158 * If UDP_EXCLBIND is set for either the bound or
5159 * binding endpoint, the semantics of bind
5160 * is changed according to the following chart.
5161 *
5162 * spec = specified address (v4 or v6)
5163 * unspec = unspecified address (v4 or v6)
5164 * A = specified addresses are different for endpoints
5165 *
5166 * bound bind to allowed?
5167 * -------------------------------------
5168 * unspec unspec no
5169 * unspec spec no
5170 * spec unspec no
5171 * spec spec yes if A
5172 *
5173 * For labeled systems, SO_MAC_EXEMPT behaves the same
5174 * as UDP_EXCLBIND, except that zoneid is ignored.
5175 */
5176 if (connp1->conn_exclbind || connp->conn_exclbind ||
5177 IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5178 if (V6_OR_V4_INADDR_ANY(
5179 connp1->conn_bound_addr_v6) ||
5180 is_inaddr_any ||
5181 IN6_ARE_ADDR_EQUAL(
5182 &connp1->conn_bound_addr_v6,
5183 &v6src)) {
5184 found_exclbind = B_TRUE;
5185 break;
5186 }
5187 continue;
5188 }
5189
5190 /*
5191 * Check ipversion to allow IPv4 and IPv6 sockets to
5192 * have disjoint port number spaces.
5193 */
5194 if (connp->conn_ipversion != connp1->conn_ipversion) {
5195
5196 /*
5197 * On the first time through the loop, if the
5198 * the user intentionally specified a
5199 * particular port number, then ignore any
5200 * bindings of the other protocol that may
5201 * conflict. This allows the user to bind IPv6
5202 * alone and get both v4 and v6, or bind both
5203 * both and get each seperately. On subsequent
5204 * times through the loop, we're checking a
5205 * port that we chose (not the user) and thus
5206 * we do not allow casual duplicate bindings.
5207 */
5208 if (count == 0 && requested_port != 0)
5209 continue;
5210 }
5211
5212 /*
5213 * No difference depending on SO_REUSEADDR.
5214 *
5215 * If existing port is bound to a
5216 * non-wildcard IP address and
5217 * the requesting stream is bound to
5218 * a distinct different IP addresses
5219 * (non-wildcard, also), keep going.
5220 */
5221 if (!is_inaddr_any &&
5222 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5223 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5224 &v6src)) {
5225 continue;
5226 }
5227
5228 /*
5229 * if bound conn has reuseport set and conn requests
5230 * reuseport, check if cred matches. If they match,
5231 * allow conn to proceed.
5232 */
5233 if (connp->conn_reuseport && connp1->conn_reuseport) {
5234 cred_t *bcred = connp1->conn_cred;
5235 cred_t *ncred = connp->conn_cred;
5236 if (crgetuid(bcred) == crgetuid(ncred) &&
5237 crgetzoneid(bcred) == crgetzoneid(ncred)) {
5238 (void) udp_reuselist_add(reusep, connp1,
5239 B_FALSE);
5240 continue;
5241 }
5242 }
5243
5244 break;
5245 }
5246
5247
5248 if (!found_exclbind &&
5249 (connp->conn_reuseaddr && requested_port != 0)) {
5250 if (reusep != NULL) {
5251 mutex_destroy(&reusep->ru_lock);
5252 kmem_free(reusep, sizeof (*reusep));
5253 }
5254 break;
5255 }
5256
5257 if (udp1 == NULL) {
5258 /*
5259 * No other stream has this IP address and port number
5260 * or all have reuseport set. We can use it.
5261 */
5262 if (connp->conn_reuseport) {
5263 struct reuselist *old = NULL;
5264
5265 if (reusep->ru_entries > 0) {
5266 old = reusep->ru_conns[0]->
5267 conn_reuselist;
5268 ASSERT(old != NULL);
5269 }
5270 if (old != NULL) {
5271 int i;
5272
5273 mutex_enter(&reusep->ru_lock);
5274 for (i = 0; i < old->ru_entries; ++i) {
5275 ASSERT(old->ru_conns[i]->
5276 conn_reuselist == old);
5277 old->ru_conns[i]->conn_reuselist
5278 = reusep;
5279 }
5280 mutex_exit(&reusep->ru_lock);
5281 }
5282 if (udp_reuselist_add(reusep, connp, B_TRUE)
5283 < 0) {
5284 /*
5285 * table full, reject request. As we
5286 * have already replaced the table,
5287 * leave the new one in the conns and
5288 * free the old
5289 */
5290 if (old != NULL) {
5291 mutex_destroy(&old->ru_lock);
5292 kmem_free(old,
5293 sizeof (*reusep));
5294 }
5295 mutex_exit(&udpf->uf_lock);
5296 mutex_exit(&connp->conn_lock);
5297 return (-TADDRBUSY);
5298 }
5299 connp->conn_reuselist = reusep;
5300 if (old != NULL) {
5301 mutex_destroy(&old->ru_lock);
5302 kmem_free(old, sizeof (*reusep));
5303 }
5304 }
5305 break;
5306 }
5307 mutex_exit(&udpf->uf_lock);
5308
5309 if (connp->conn_reuseport) {
5310 /* reject for all other cases */
5311 mutex_exit(&connp->conn_lock);
5312 mutex_destroy(&reusep->ru_lock);
5313 kmem_free(reusep, sizeof (*reusep));
5314 return (-TADDRBUSY);
5315 }
5316
5317 if (bind_to_req_port_only) {
5318 /*
5319 * We get here only when requested port
5320 * is bound (and only first of the for()
5321 * loop iteration).
5322 *
5323 * The semantics of this bind request
5324 * require it to fail so we return from
5325 * the routine (and exit the loop).
5326 *
5327 */
5328 mutex_exit(&connp->conn_lock);
5329 return (-TADDRBUSY);
5330 }
5331
5332 if (connp->conn_anon_priv_bind) {
5333 port = udp_get_next_priv_port(udp);
5334 } else {
5335 if ((count == 0) && (requested_port != 0)) {
5336 /*
5337 * If the application wants us to find
5338 * a port, get one to start with. Set
5339 * requested_port to 0, so that we will
5340 * update us->us_next_port_to_try below.
5341 */
5342 port = udp_update_next_port(udp,
5343 us->us_next_port_to_try, B_TRUE);
5344 requested_port = 0;
5345 } else {
5346 port = udp_update_next_port(udp, port + 1,
5347 B_FALSE);
5348 }
5349 }
5350
5351 if (port == 0 || ++count >= loopmax) {
5352 /*
5353 * We've tried every possible port number and
5354 * there are none available, so send an error
5355 * to the user.
5356 */
5357 mutex_exit(&connp->conn_lock);
5358 return (-TNOADDR);
5359 }
5360 }
5361
5362 /*
5363 * Copy the source address into our udp structure. This address
5364 * may still be zero; if so, ip_attr_connect will fill in the correct
5365 * address when a packet is about to be sent.
5366 * If we are binding to a broadcast or multicast address then
5367 * we just set the conn_bound_addr since we don't want to use
5368 * that as the source address when sending.
5369 */
5370 connp->conn_bound_addr_v6 = v6src;
5371 connp->conn_laddr_v6 = v6src;
5372 if (scopeid != 0) {
5373 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5374 connp->conn_ixa->ixa_scopeid = scopeid;
5375 connp->conn_incoming_ifindex = scopeid;
5376 } else {
5377 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5378 connp->conn_incoming_ifindex = connp->conn_bound_if;
5379 }
5380
5381 switch (laddr_type) {
5382 case IPVL_UNICAST_UP:
5383 case IPVL_UNICAST_DOWN:
5384 connp->conn_saddr_v6 = v6src;
5385 connp->conn_mcbc_bind = B_FALSE;
5386 break;
5387 case IPVL_MCAST:
5388 case IPVL_BCAST:
5389 /* ip_set_destination will pick a source address later */
5390 connp->conn_saddr_v6 = ipv6_all_zeros;
5391 connp->conn_mcbc_bind = B_TRUE;
5392 break;
5393 }
5394
5395 /* Any errors after this point should use late_error */
5396 connp->conn_lport = lport;
5397
5398 /*
5399 * Now reset the next anonymous port if the application requested
5400 * an anonymous port, or we handed out the next anonymous port.
5401 */
5402 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5403 us->us_next_port_to_try = port + 1;
5404 }
5405
5406 /* Initialize the T_BIND_ACK. */
5407 if (connp->conn_family == AF_INET) {
5408 sin->sin_port = connp->conn_lport;
5409 } else {
5410 sin6->sin6_port = connp->conn_lport;
5411 }
5412 udp->udp_state = TS_IDLE;
5413 udp_bind_hash_insert(udpf, udp);
5414 mutex_exit(&udpf->uf_lock);
5415 mutex_exit(&connp->conn_lock);
5416
5417 if (cl_inet_bind) {
5418 /*
5419 * Running in cluster mode - register bind information
5420 */
5421 if (connp->conn_ipversion == IPV4_VERSION) {
5422 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5423 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5424 (in_port_t)connp->conn_lport, NULL);
5425 } else {
5426 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5427 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5428 (in_port_t)connp->conn_lport, NULL);
5429 }
5430 }
5431
5432 mutex_enter(&connp->conn_lock);
5433 connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5434 if (is_system_labeled() && (!connp->conn_anon_port ||
5435 connp->conn_anon_mlp)) {
5436 uint16_t mlpport;
5437 zone_t *zone;
5438
5439 zone = crgetzone(cr);
5440 connp->conn_mlp_type =
5441 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5442 mlptSingle;
5443 addrtype = tsol_mlp_addr_type(
5444 connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5445 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5446 if (addrtype == mlptSingle) {
5447 error = -TNOADDR;
5448 mutex_exit(&connp->conn_lock);
5449 goto late_error;
5450 }
5451 mlpport = connp->conn_anon_port ? PMAPPORT : port;
5452 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5453 addrtype);
5454
5455 /*
5456 * It is a coding error to attempt to bind an MLP port
5457 * without first setting SOL_SOCKET/SCM_UCRED.
5458 */
5459 if (mlptype != mlptSingle &&
5460 connp->conn_mlp_type == mlptSingle) {
5461 error = EINVAL;
5462 mutex_exit(&connp->conn_lock);
5463 goto late_error;
5464 }
5465
5466 /*
5467 * It is an access violation to attempt to bind an MLP port
5468 * without NET_BINDMLP privilege.
5469 */
5470 if (mlptype != mlptSingle &&
5471 secpolicy_net_bindmlp(cr) != 0) {
5472 if (connp->conn_debug) {
5473 (void) strlog(UDP_MOD_ID, 0, 1,
5474 SL_ERROR|SL_TRACE,
5475 "udp_bind: no priv for multilevel port %d",
5476 mlpport);
5477 }
5478 error = -TACCES;
5479 mutex_exit(&connp->conn_lock);
5480 goto late_error;
5481 }
5482
5483 /*
5484 * If we're specifically binding a shared IP address and the
5485 * port is MLP on shared addresses, then check to see if this
5486 * zone actually owns the MLP. Reject if not.
5487 */
5488 if (mlptype == mlptShared && addrtype == mlptShared) {
5489 /*
5490 * No need to handle exclusive-stack zones since
5491 * ALL_ZONES only applies to the shared stack.
5492 */
5493 zoneid_t mlpzone;
5494
5495 mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5496 htons(mlpport));
5497 if (connp->conn_zoneid != mlpzone) {
5498 if (connp->conn_debug) {
5499 (void) strlog(UDP_MOD_ID, 0, 1,
5500 SL_ERROR|SL_TRACE,
5501 "udp_bind: attempt to bind port "
5502 "%d on shared addr in zone %d "
5503 "(should be %d)",
5504 mlpport, connp->conn_zoneid,
5505 mlpzone);
5506 }
5507 error = -TACCES;
5508 mutex_exit(&connp->conn_lock);
5509 goto late_error;
5510 }
5511 }
5512 if (connp->conn_anon_port) {
5513 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5514 port, B_TRUE);
5515 if (error != 0) {
5516 if (connp->conn_debug) {
5517 (void) strlog(UDP_MOD_ID, 0, 1,
5518 SL_ERROR|SL_TRACE,
5519 "udp_bind: cannot establish anon "
5520 "MLP for port %d", port);
5521 }
5522 error = -TACCES;
5523 mutex_exit(&connp->conn_lock);
5524 goto late_error;
5525 }
5526 }
5527 connp->conn_mlp_type = mlptype;
5528 }
5529
5530 /*
5531 * We create an initial header template here to make a subsequent
5532 * sendto have a starting point. Since conn_last_dst is zero the
5533 * first sendto will always follow the 'dst changed' code path.
5534 * Note that we defer massaging options and the related checksum
5535 * adjustment until we have a destination address.
5536 */
5537 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5538 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5539 if (error != 0) {
5540 mutex_exit(&connp->conn_lock);
5541 goto late_error;
5542 }
5543 /* Just in case */
5544 connp->conn_faddr_v6 = ipv6_all_zeros;
5545 connp->conn_fport = 0;
5546 connp->conn_v6lastdst = ipv6_all_zeros;
5547 mutex_exit(&connp->conn_lock);
5548
5549 error = ip_laddr_fanout_insert(connp);
5550 if (error != 0)
5551 goto late_error;
5552
5553 /* Bind succeeded */
5554 return (0);
5555
5556 late_error:
5557 /* We had already picked the port number, and then the bind failed */
5558 mutex_enter(&connp->conn_lock);
5559 udpf = &us->us_bind_fanout[
5560 UDP_BIND_HASH(connp->conn_lport,
5561 us->us_bind_fanout_size)];
5562 mutex_enter(&udpf->uf_lock);
5563 connp->conn_saddr_v6 = ipv6_all_zeros;
5564 connp->conn_bound_addr_v6 = ipv6_all_zeros;
5565 connp->conn_laddr_v6 = ipv6_all_zeros;
5566 if (scopeid != 0) {
5567 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5568 connp->conn_incoming_ifindex = connp->conn_bound_if;
5569 }
5570 udp->udp_state = TS_UNBND;
5571 udp_bind_hash_remove(udp, B_TRUE);
5572 udp_reuselist_remove(connp);
5573 connp->conn_lport = 0;
5574 mutex_exit(&udpf->uf_lock);
5575 connp->conn_anon_port = B_FALSE;
5576 connp->conn_mlp_type = mlptSingle;
5577
5578 connp->conn_v6lastdst = ipv6_all_zeros;
5579
5580 /* Restore the header that was built above - different source address */
5581 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5582 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5583 mutex_exit(&connp->conn_lock);
5584 return (error);
5585 }
5586
5587 int
udp_bind(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t len,cred_t * cr)5588 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5589 socklen_t len, cred_t *cr)
5590 {
5591 int error;
5592 conn_t *connp;
5593
5594 /* All Solaris components should pass a cred for this operation. */
5595 ASSERT(cr != NULL);
5596
5597 connp = (conn_t *)proto_handle;
5598
5599 if (sa == NULL)
5600 error = udp_do_unbind(connp);
5601 else
5602 error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5603
5604 if (error < 0) {
5605 if (error == -TOUTSTATE)
5606 error = EINVAL;
5607 else
5608 error = proto_tlitosyserr(-error);
5609 }
5610
5611 return (error);
5612 }
5613
5614 static int
udp_implicit_bind(conn_t * connp,cred_t * cr)5615 udp_implicit_bind(conn_t *connp, cred_t *cr)
5616 {
5617 sin6_t sin6addr;
5618 sin_t *sin;
5619 sin6_t *sin6;
5620 socklen_t len;
5621 int error;
5622
5623 /* All Solaris components should pass a cred for this operation. */
5624 ASSERT(cr != NULL);
5625
5626 if (connp->conn_family == AF_INET) {
5627 len = sizeof (struct sockaddr_in);
5628 sin = (sin_t *)&sin6addr;
5629 *sin = sin_null;
5630 sin->sin_family = AF_INET;
5631 sin->sin_addr.s_addr = INADDR_ANY;
5632 } else {
5633 ASSERT(connp->conn_family == AF_INET6);
5634 len = sizeof (sin6_t);
5635 sin6 = (sin6_t *)&sin6addr;
5636 *sin6 = sin6_null;
5637 sin6->sin6_family = AF_INET6;
5638 V6_SET_ZERO(sin6->sin6_addr);
5639 }
5640
5641 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5642 cr, B_FALSE);
5643 return ((error < 0) ? proto_tlitosyserr(-error) : error);
5644 }
5645
5646 /*
5647 * This routine removes a port number association from a stream. It
5648 * is called by udp_unbind and udp_tpi_unbind.
5649 */
5650 static int
udp_do_unbind(conn_t * connp)5651 udp_do_unbind(conn_t *connp)
5652 {
5653 udp_t *udp = connp->conn_udp;
5654 udp_fanout_t *udpf;
5655 udp_stack_t *us = udp->udp_us;
5656
5657 if (cl_inet_unbind != NULL) {
5658 /*
5659 * Running in cluster mode - register unbind information
5660 */
5661 if (connp->conn_ipversion == IPV4_VERSION) {
5662 (*cl_inet_unbind)(
5663 connp->conn_netstack->netstack_stackid,
5664 IPPROTO_UDP, AF_INET,
5665 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5666 (in_port_t)connp->conn_lport, NULL);
5667 } else {
5668 (*cl_inet_unbind)(
5669 connp->conn_netstack->netstack_stackid,
5670 IPPROTO_UDP, AF_INET6,
5671 (uint8_t *)&(connp->conn_laddr_v6),
5672 (in_port_t)connp->conn_lport, NULL);
5673 }
5674 }
5675
5676 mutex_enter(&connp->conn_lock);
5677 /* If a bind has not been done, we can't unbind. */
5678 if (udp->udp_state == TS_UNBND) {
5679 mutex_exit(&connp->conn_lock);
5680 return (-TOUTSTATE);
5681 }
5682 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5683 us->us_bind_fanout_size)];
5684 mutex_enter(&udpf->uf_lock);
5685 udp_bind_hash_remove(udp, B_TRUE);
5686 udp_reuselist_remove(connp);
5687 connp->conn_saddr_v6 = ipv6_all_zeros;
5688 connp->conn_bound_addr_v6 = ipv6_all_zeros;
5689 connp->conn_laddr_v6 = ipv6_all_zeros;
5690 connp->conn_mcbc_bind = B_FALSE;
5691 connp->conn_lport = 0;
5692 /* In case we were also connected */
5693 connp->conn_faddr_v6 = ipv6_all_zeros;
5694 connp->conn_fport = 0;
5695 mutex_exit(&udpf->uf_lock);
5696
5697 connp->conn_v6lastdst = ipv6_all_zeros;
5698 udp->udp_state = TS_UNBND;
5699
5700 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5701 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5702 mutex_exit(&connp->conn_lock);
5703
5704 ip_unbind(connp);
5705
5706 return (0);
5707 }
5708
5709 /*
5710 * It associates a default destination address with the stream.
5711 */
5712 static int
udp_do_connect(conn_t * connp,const struct sockaddr * sa,socklen_t len,cred_t * cr,pid_t pid)5713 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5714 cred_t *cr, pid_t pid)
5715 {
5716 sin6_t *sin6;
5717 sin_t *sin;
5718 in6_addr_t v6dst;
5719 ipaddr_t v4dst;
5720 uint16_t dstport;
5721 uint32_t flowinfo;
5722 udp_fanout_t *udpf;
5723 udp_t *udp, *udp1;
5724 ushort_t ipversion;
5725 udp_stack_t *us;
5726 int error;
5727 conn_t *connp1;
5728 ip_xmit_attr_t *ixa;
5729 ip_xmit_attr_t *oldixa;
5730 uint_t scopeid = 0;
5731 uint_t srcid = 0;
5732 in6_addr_t v6src = connp->conn_saddr_v6;
5733 boolean_t v4mapped;
5734
5735 udp = connp->conn_udp;
5736 us = udp->udp_us;
5737
5738 /*
5739 * Address has been verified by the caller
5740 */
5741 switch (len) {
5742 default:
5743 /*
5744 * Should never happen
5745 */
5746 return (EINVAL);
5747
5748 case sizeof (sin_t):
5749 sin = (sin_t *)sa;
5750 v4dst = sin->sin_addr.s_addr;
5751 dstport = sin->sin_port;
5752 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5753 ASSERT(connp->conn_ipversion == IPV4_VERSION);
5754 ipversion = IPV4_VERSION;
5755 break;
5756
5757 case sizeof (sin6_t):
5758 sin6 = (sin6_t *)sa;
5759 v6dst = sin6->sin6_addr;
5760 dstport = sin6->sin6_port;
5761 srcid = sin6->__sin6_src_id;
5762 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
5763 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5764 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5765 v4mapped, connp->conn_netstack)) {
5766 /* Mismatch v4mapped/v6 specified by srcid. */
5767 return (EADDRNOTAVAIL);
5768 }
5769 }
5770 if (v4mapped) {
5771 if (connp->conn_ipv6_v6only)
5772 return (EADDRNOTAVAIL);
5773
5774 /*
5775 * Destination adress is mapped IPv6 address.
5776 * Source bound address should be unspecified or
5777 * IPv6 mapped address as well.
5778 */
5779 if (!IN6_IS_ADDR_UNSPECIFIED(
5780 &connp->conn_bound_addr_v6) &&
5781 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5782 return (EADDRNOTAVAIL);
5783 }
5784 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5785 ipversion = IPV4_VERSION;
5786 flowinfo = 0;
5787 } else {
5788 ipversion = IPV6_VERSION;
5789 flowinfo = sin6->sin6_flowinfo;
5790 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5791 scopeid = sin6->sin6_scope_id;
5792 }
5793 break;
5794 }
5795
5796 if (dstport == 0)
5797 return (-TBADADDR);
5798
5799 /*
5800 * If there is a different thread using conn_ixa then we get a new
5801 * copy and cut the old one loose from conn_ixa. Otherwise we use
5802 * conn_ixa and prevent any other thread from using/changing it.
5803 * Once connect() is done other threads can use conn_ixa since the
5804 * refcnt will be back at one.
5805 * We defer updating conn_ixa until later to handle any concurrent
5806 * conn_ixa_cleanup thread.
5807 */
5808 ixa = conn_get_ixa(connp, B_FALSE);
5809 if (ixa == NULL)
5810 return (ENOMEM);
5811
5812 mutex_enter(&connp->conn_lock);
5813 /*
5814 * This udp_t must have bound to a port already before doing a connect.
5815 * Reject if a connect is in progress (we drop conn_lock during
5816 * udp_do_connect).
5817 */
5818 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5819 mutex_exit(&connp->conn_lock);
5820 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5821 "udp_connect: bad state, %u", udp->udp_state);
5822 ixa_refrele(ixa);
5823 return (-TOUTSTATE);
5824 }
5825 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5826
5827 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5828 us->us_bind_fanout_size)];
5829
5830 mutex_enter(&udpf->uf_lock);
5831 if (udp->udp_state == TS_DATA_XFER) {
5832 /* Already connected - clear out state */
5833 if (connp->conn_mcbc_bind)
5834 connp->conn_saddr_v6 = ipv6_all_zeros;
5835 else
5836 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5837 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5838 connp->conn_faddr_v6 = ipv6_all_zeros;
5839 connp->conn_fport = 0;
5840 udp->udp_state = TS_IDLE;
5841 }
5842
5843 connp->conn_fport = dstport;
5844 connp->conn_ipversion = ipversion;
5845 if (ipversion == IPV4_VERSION) {
5846 /*
5847 * Interpret a zero destination to mean loopback.
5848 * Update the T_CONN_REQ (sin/sin6) since it is used to
5849 * generate the T_CONN_CON.
5850 */
5851 if (v4dst == INADDR_ANY) {
5852 v4dst = htonl(INADDR_LOOPBACK);
5853 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5854 if (connp->conn_family == AF_INET) {
5855 sin->sin_addr.s_addr = v4dst;
5856 } else {
5857 sin6->sin6_addr = v6dst;
5858 }
5859 }
5860 connp->conn_faddr_v6 = v6dst;
5861 connp->conn_flowinfo = 0;
5862 } else {
5863 ASSERT(connp->conn_ipversion == IPV6_VERSION);
5864 /*
5865 * Interpret a zero destination to mean loopback.
5866 * Update the T_CONN_REQ (sin/sin6) since it is used to
5867 * generate the T_CONN_CON.
5868 */
5869 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5870 v6dst = ipv6_loopback;
5871 sin6->sin6_addr = v6dst;
5872 }
5873 connp->conn_faddr_v6 = v6dst;
5874 connp->conn_flowinfo = flowinfo;
5875 }
5876 mutex_exit(&udpf->uf_lock);
5877
5878 /*
5879 * We update our cred/cpid based on the caller of connect
5880 */
5881 if (connp->conn_cred != cr) {
5882 crhold(cr);
5883 crfree(connp->conn_cred);
5884 connp->conn_cred = cr;
5885 }
5886 connp->conn_cpid = pid;
5887 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5888 ixa->ixa_cred = cr;
5889 ixa->ixa_cpid = pid;
5890 if (is_system_labeled()) {
5891 /* We need to restart with a label based on the cred */
5892 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5893 }
5894
5895 if (scopeid != 0) {
5896 ixa->ixa_flags |= IXAF_SCOPEID_SET;
5897 ixa->ixa_scopeid = scopeid;
5898 connp->conn_incoming_ifindex = scopeid;
5899 } else {
5900 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5901 connp->conn_incoming_ifindex = connp->conn_bound_if;
5902 }
5903 /*
5904 * conn_connect will drop conn_lock and reacquire it.
5905 * To prevent a send* from messing with this udp_t while the lock
5906 * is dropped we set udp_state and clear conn_v6lastdst.
5907 * That will make all send* fail with EISCONN.
5908 */
5909 connp->conn_v6lastdst = ipv6_all_zeros;
5910 udp->udp_state = TS_WCON_CREQ;
5911
5912 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5913 mutex_exit(&connp->conn_lock);
5914 if (error != 0)
5915 goto connect_failed;
5916
5917 /*
5918 * The addresses have been verified. Time to insert in
5919 * the correct fanout list.
5920 */
5921 error = ipcl_conn_insert(connp);
5922 if (error != 0)
5923 goto connect_failed;
5924
5925 mutex_enter(&connp->conn_lock);
5926 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5927 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5928 if (error != 0) {
5929 mutex_exit(&connp->conn_lock);
5930 goto connect_failed;
5931 }
5932
5933 udp->udp_state = TS_DATA_XFER;
5934 /* Record this as the "last" send even though we haven't sent any */
5935 connp->conn_v6lastdst = connp->conn_faddr_v6;
5936 connp->conn_lastipversion = connp->conn_ipversion;
5937 connp->conn_lastdstport = connp->conn_fport;
5938 connp->conn_lastflowinfo = connp->conn_flowinfo;
5939 connp->conn_lastscopeid = scopeid;
5940 connp->conn_lastsrcid = srcid;
5941 /* Also remember a source to use together with lastdst */
5942 connp->conn_v6lastsrc = v6src;
5943
5944 oldixa = conn_replace_ixa(connp, ixa);
5945 mutex_exit(&connp->conn_lock);
5946 ixa_refrele(oldixa);
5947
5948 /*
5949 * We've picked a source address above. Now we can
5950 * verify that the src/port/dst/port is unique for all
5951 * connections in TS_DATA_XFER, skipping ourselves.
5952 */
5953 mutex_enter(&udpf->uf_lock);
5954 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5955 if (udp1->udp_state != TS_DATA_XFER)
5956 continue;
5957
5958 if (udp1 == udp)
5959 continue;
5960
5961 connp1 = udp1->udp_connp;
5962 if (connp->conn_lport != connp1->conn_lport ||
5963 connp->conn_ipversion != connp1->conn_ipversion ||
5964 dstport != connp1->conn_fport ||
5965 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5966 &connp1->conn_laddr_v6) ||
5967 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5968 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5969 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5970 continue;
5971 mutex_exit(&udpf->uf_lock);
5972 error = -TBADADDR;
5973 goto connect_failed;
5974 }
5975 if (cl_inet_connect2 != NULL) {
5976 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5977 if (error != 0) {
5978 mutex_exit(&udpf->uf_lock);
5979 error = -TBADADDR;
5980 goto connect_failed;
5981 }
5982 }
5983 mutex_exit(&udpf->uf_lock);
5984
5985 ixa_refrele(ixa);
5986 return (0);
5987
5988 connect_failed:
5989 if (ixa != NULL)
5990 ixa_refrele(ixa);
5991 mutex_enter(&connp->conn_lock);
5992 mutex_enter(&udpf->uf_lock);
5993 udp->udp_state = TS_IDLE;
5994 connp->conn_faddr_v6 = ipv6_all_zeros;
5995 connp->conn_fport = 0;
5996 /* In case the source address was set above */
5997 if (connp->conn_mcbc_bind)
5998 connp->conn_saddr_v6 = ipv6_all_zeros;
5999 else
6000 connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
6001 connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
6002 mutex_exit(&udpf->uf_lock);
6003
6004 connp->conn_v6lastdst = ipv6_all_zeros;
6005 connp->conn_flowinfo = 0;
6006
6007 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
6008 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
6009 mutex_exit(&connp->conn_lock);
6010 return (error);
6011 }
6012
6013 static int
udp_connect(sock_lower_handle_t proto_handle,const struct sockaddr * sa,socklen_t len,sock_connid_t * id,cred_t * cr)6014 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
6015 socklen_t len, sock_connid_t *id, cred_t *cr)
6016 {
6017 conn_t *connp = (conn_t *)proto_handle;
6018 udp_t *udp = connp->conn_udp;
6019 int error;
6020 boolean_t did_bind = B_FALSE;
6021 pid_t pid = curproc->p_pid;
6022
6023 /* All Solaris components should pass a cred for this operation. */
6024 ASSERT(cr != NULL);
6025
6026 if (sa == NULL) {
6027 /*
6028 * Disconnect
6029 * Make sure we are connected
6030 */
6031 if (udp->udp_state != TS_DATA_XFER)
6032 return (EINVAL);
6033
6034 error = udp_disconnect(connp);
6035 return (error);
6036 }
6037
6038 error = proto_verify_ip_addr(connp->conn_family, sa, len);
6039 if (error != 0)
6040 goto done;
6041
6042 /* do an implicit bind if necessary */
6043 if (udp->udp_state == TS_UNBND) {
6044 error = udp_implicit_bind(connp, cr);
6045 /*
6046 * We could be racing with an actual bind, in which case
6047 * we would see EPROTO. We cross our fingers and try
6048 * to connect.
6049 */
6050 if (!(error == 0 || error == EPROTO))
6051 goto done;
6052 did_bind = B_TRUE;
6053 }
6054 /*
6055 * set SO_DGRAM_ERRIND
6056 */
6057 connp->conn_dgram_errind = B_TRUE;
6058
6059 error = udp_do_connect(connp, sa, len, cr, pid);
6060
6061 if (error != 0 && did_bind) {
6062 int unbind_err;
6063
6064 unbind_err = udp_do_unbind(connp);
6065 ASSERT(unbind_err == 0);
6066 }
6067
6068 if (error == 0) {
6069 *id = 0;
6070 (*connp->conn_upcalls->su_connected)
6071 (connp->conn_upper_handle, 0, NULL, -1);
6072 } else if (error < 0) {
6073 error = proto_tlitosyserr(-error);
6074 }
6075
6076 done:
6077 if (error != 0 && udp->udp_state == TS_DATA_XFER) {
6078 /*
6079 * No need to hold locks to set state
6080 * after connect failure socket state is undefined
6081 * We set the state only to imitate old sockfs behavior
6082 */
6083 udp->udp_state = TS_IDLE;
6084 }
6085 return (error);
6086 }
6087
6088 int
udp_send(sock_lower_handle_t proto_handle,mblk_t * mp,struct nmsghdr * msg,cred_t * cr)6089 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6090 cred_t *cr)
6091 {
6092 sin6_t *sin6;
6093 sin_t *sin = NULL;
6094 uint_t srcid;
6095 conn_t *connp = (conn_t *)proto_handle;
6096 udp_t *udp = connp->conn_udp;
6097 int error = 0;
6098 udp_stack_t *us = udp->udp_us;
6099 ushort_t ipversion;
6100 pid_t pid = curproc->p_pid;
6101 ip_xmit_attr_t *ixa;
6102
6103 ASSERT(DB_TYPE(mp) == M_DATA);
6104
6105 /* All Solaris components should pass a cred for this operation. */
6106 ASSERT(cr != NULL);
6107
6108 /* do an implicit bind if necessary */
6109 if (udp->udp_state == TS_UNBND) {
6110 error = udp_implicit_bind(connp, cr);
6111 /*
6112 * We could be racing with an actual bind, in which case
6113 * we would see EPROTO. We cross our fingers and try
6114 * to connect.
6115 */
6116 if (!(error == 0 || error == EPROTO)) {
6117 freemsg(mp);
6118 return (error);
6119 }
6120 }
6121
6122 /* Connected? */
6123 if (msg->msg_name == NULL) {
6124 if (udp->udp_state != TS_DATA_XFER) {
6125 UDPS_BUMP_MIB(us, udpOutErrors);
6126 return (EDESTADDRREQ);
6127 }
6128 if (msg->msg_controllen != 0) {
6129 error = udp_output_ancillary(connp, NULL, NULL, mp,
6130 NULL, msg, cr, pid);
6131 } else {
6132 error = udp_output_connected(connp, mp, cr, pid);
6133 }
6134 if (us->us_sendto_ignerr)
6135 return (0);
6136 else
6137 return (error);
6138 }
6139 if (udp->udp_state == TS_DATA_XFER) {
6140 UDPS_BUMP_MIB(us, udpOutErrors);
6141 return (EISCONN);
6142 }
6143 error = proto_verify_ip_addr(connp->conn_family,
6144 (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6145 if (error != 0) {
6146 UDPS_BUMP_MIB(us, udpOutErrors);
6147 return (error);
6148 }
6149 switch (connp->conn_family) {
6150 case AF_INET6:
6151 sin6 = (sin6_t *)msg->msg_name;
6152
6153 srcid = sin6->__sin6_src_id;
6154
6155 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6156 /*
6157 * Destination is a non-IPv4-compatible IPv6 address.
6158 * Send out an IPv6 format packet.
6159 */
6160
6161 /*
6162 * If the local address is a mapped address return
6163 * an error.
6164 * It would be possible to send an IPv6 packet but the
6165 * response would never make it back to the application
6166 * since it is bound to a mapped address.
6167 */
6168 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
6169 UDPS_BUMP_MIB(us, udpOutErrors);
6170 return (EADDRNOTAVAIL);
6171 }
6172 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6173 sin6->sin6_addr = ipv6_loopback;
6174 ipversion = IPV6_VERSION;
6175 } else {
6176 if (connp->conn_ipv6_v6only) {
6177 UDPS_BUMP_MIB(us, udpOutErrors);
6178 return (EADDRNOTAVAIL);
6179 }
6180
6181 /*
6182 * If the local address is not zero or a mapped address
6183 * return an error. It would be possible to send an
6184 * IPv4 packet but the response would never make it
6185 * back to the application since it is bound to a
6186 * non-mapped address.
6187 */
6188 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6189 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6190 UDPS_BUMP_MIB(us, udpOutErrors);
6191 return (EADDRNOTAVAIL);
6192 }
6193
6194 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6195 V4_PART_OF_V6(sin6->sin6_addr) =
6196 htonl(INADDR_LOOPBACK);
6197 }
6198 ipversion = IPV4_VERSION;
6199 }
6200
6201 /*
6202 * We have to allocate an ip_xmit_attr_t before we grab
6203 * conn_lock and we need to hold conn_lock once we've check
6204 * conn_same_as_last_v6 to handle concurrent send* calls on a
6205 * socket.
6206 */
6207 if (msg->msg_controllen == 0) {
6208 ixa = conn_get_ixa(connp, B_FALSE);
6209 if (ixa == NULL) {
6210 UDPS_BUMP_MIB(us, udpOutErrors);
6211 return (ENOMEM);
6212 }
6213 } else {
6214 ixa = NULL;
6215 }
6216 mutex_enter(&connp->conn_lock);
6217 if (udp->udp_delayed_error != 0) {
6218 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6219
6220 error = udp->udp_delayed_error;
6221 udp->udp_delayed_error = 0;
6222
6223 /* Compare IP address, port, and family */
6224
6225 if (sin6->sin6_port == sin2->sin6_port &&
6226 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6227 &sin2->sin6_addr) &&
6228 sin6->sin6_family == sin2->sin6_family) {
6229 mutex_exit(&connp->conn_lock);
6230 UDPS_BUMP_MIB(us, udpOutErrors);
6231 if (ixa != NULL)
6232 ixa_refrele(ixa);
6233 return (error);
6234 }
6235 }
6236
6237 if (msg->msg_controllen != 0) {
6238 mutex_exit(&connp->conn_lock);
6239 ASSERT(ixa == NULL);
6240 error = udp_output_ancillary(connp, NULL, sin6, mp,
6241 NULL, msg, cr, pid);
6242 } else if (conn_same_as_last_v6(connp, sin6) &&
6243 connp->conn_lastsrcid == srcid &&
6244 ipsec_outbound_policy_current(ixa)) {
6245 /* udp_output_lastdst drops conn_lock */
6246 error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6247 } else {
6248 /* udp_output_newdst drops conn_lock */
6249 error = udp_output_newdst(connp, mp, NULL, sin6,
6250 ipversion, cr, pid, ixa);
6251 }
6252 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6253 if (us->us_sendto_ignerr)
6254 return (0);
6255 else
6256 return (error);
6257 case AF_INET:
6258 sin = (sin_t *)msg->msg_name;
6259
6260 ipversion = IPV4_VERSION;
6261
6262 if (sin->sin_addr.s_addr == INADDR_ANY)
6263 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6264
6265 /*
6266 * We have to allocate an ip_xmit_attr_t before we grab
6267 * conn_lock and we need to hold conn_lock once we've check
6268 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6269 */
6270 if (msg->msg_controllen == 0) {
6271 ixa = conn_get_ixa(connp, B_FALSE);
6272 if (ixa == NULL) {
6273 UDPS_BUMP_MIB(us, udpOutErrors);
6274 return (ENOMEM);
6275 }
6276 } else {
6277 ixa = NULL;
6278 }
6279 mutex_enter(&connp->conn_lock);
6280 if (udp->udp_delayed_error != 0) {
6281 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr;
6282
6283 error = udp->udp_delayed_error;
6284 udp->udp_delayed_error = 0;
6285
6286 /* Compare IP address and port */
6287
6288 if (sin->sin_port == sin2->sin_port &&
6289 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6290 mutex_exit(&connp->conn_lock);
6291 UDPS_BUMP_MIB(us, udpOutErrors);
6292 if (ixa != NULL)
6293 ixa_refrele(ixa);
6294 return (error);
6295 }
6296 }
6297 if (msg->msg_controllen != 0) {
6298 mutex_exit(&connp->conn_lock);
6299 ASSERT(ixa == NULL);
6300 error = udp_output_ancillary(connp, sin, NULL, mp,
6301 NULL, msg, cr, pid);
6302 } else if (conn_same_as_last_v4(connp, sin) &&
6303 ipsec_outbound_policy_current(ixa)) {
6304 /* udp_output_lastdst drops conn_lock */
6305 error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6306 } else {
6307 /* udp_output_newdst drops conn_lock */
6308 error = udp_output_newdst(connp, mp, sin, NULL,
6309 ipversion, cr, pid, ixa);
6310 }
6311 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6312 if (us->us_sendto_ignerr)
6313 return (0);
6314 else
6315 return (error);
6316 default:
6317 return (EINVAL);
6318 }
6319 }
6320
6321 int
udp_fallback(sock_lower_handle_t proto_handle,queue_t * q,boolean_t issocket,so_proto_quiesced_cb_t quiesced_cb,sock_quiesce_arg_t * arg)6322 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6323 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6324 sock_quiesce_arg_t *arg)
6325 {
6326 conn_t *connp = (conn_t *)proto_handle;
6327 udp_t *udp;
6328 struct T_capability_ack tca;
6329 struct sockaddr_in6 laddr, faddr;
6330 socklen_t laddrlen, faddrlen;
6331 short opts;
6332 struct stroptions *stropt;
6333 mblk_t *mp, *stropt_mp;
6334 int error;
6335
6336 udp = connp->conn_udp;
6337
6338 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6339
6340 /*
6341 * setup the fallback stream that was allocated
6342 */
6343 connp->conn_dev = (dev_t)RD(q)->q_ptr;
6344 connp->conn_minor_arena = WR(q)->q_ptr;
6345
6346 RD(q)->q_ptr = WR(q)->q_ptr = connp;
6347
6348 WR(q)->q_qinfo = &udp_winit;
6349
6350 connp->conn_rq = RD(q);
6351 connp->conn_wq = WR(q);
6352
6353 /* Notify stream head about options before sending up data */
6354 stropt_mp->b_datap->db_type = M_SETOPTS;
6355 stropt_mp->b_wptr += sizeof (*stropt);
6356 stropt = (struct stroptions *)stropt_mp->b_rptr;
6357 stropt->so_flags = SO_WROFF | SO_HIWAT;
6358 stropt->so_wroff = connp->conn_wroff;
6359 stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6360 putnext(RD(q), stropt_mp);
6361
6362 /*
6363 * Free the helper stream
6364 */
6365 ip_free_helper_stream(connp);
6366
6367 if (!issocket)
6368 udp_use_pure_tpi(udp);
6369
6370 /*
6371 * Collect the information needed to sync with the sonode
6372 */
6373 udp_do_capability_ack(udp, &tca, TC1_INFO);
6374
6375 laddrlen = faddrlen = sizeof (sin6_t);
6376 (void) udp_getsockname((sock_lower_handle_t)connp,
6377 (struct sockaddr *)&laddr, &laddrlen, CRED());
6378 error = udp_getpeername((sock_lower_handle_t)connp,
6379 (struct sockaddr *)&faddr, &faddrlen, CRED());
6380 if (error != 0)
6381 faddrlen = 0;
6382
6383 opts = 0;
6384 if (connp->conn_dgram_errind)
6385 opts |= SO_DGRAM_ERRIND;
6386 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6387 opts |= SO_DONTROUTE;
6388
6389 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6390 (struct sockaddr *)&laddr, laddrlen,
6391 (struct sockaddr *)&faddr, faddrlen, opts);
6392
6393 mutex_enter(&udp->udp_recv_lock);
6394 /*
6395 * Attempts to send data up during fallback will result in it being
6396 * queued in udp_t. First push up the datagrams obtained from the
6397 * socket, then any packets queued in udp_t.
6398 */
6399 if (mp != NULL) {
6400 mp->b_next = udp->udp_fallback_queue_head;
6401 udp->udp_fallback_queue_head = mp;
6402 }
6403 while (udp->udp_fallback_queue_head != NULL) {
6404 mp = udp->udp_fallback_queue_head;
6405 udp->udp_fallback_queue_head = mp->b_next;
6406 mutex_exit(&udp->udp_recv_lock);
6407 mp->b_next = NULL;
6408 putnext(RD(q), mp);
6409 mutex_enter(&udp->udp_recv_lock);
6410 }
6411 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6412 /*
6413 * No longer a streams less socket
6414 */
6415 mutex_enter(&connp->conn_lock);
6416 connp->conn_flags &= ~IPCL_NONSTR;
6417 mutex_exit(&connp->conn_lock);
6418
6419 mutex_exit(&udp->udp_recv_lock);
6420
6421 ASSERT(connp->conn_ref >= 1);
6422
6423 return (0);
6424 }
6425
6426 /* ARGSUSED3 */
6427 int
udp_getpeername(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t * salenp,cred_t * cr)6428 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6429 socklen_t *salenp, cred_t *cr)
6430 {
6431 conn_t *connp = (conn_t *)proto_handle;
6432 udp_t *udp = connp->conn_udp;
6433 int error;
6434
6435 /* All Solaris components should pass a cred for this operation. */
6436 ASSERT(cr != NULL);
6437
6438 mutex_enter(&connp->conn_lock);
6439 if (udp->udp_state != TS_DATA_XFER)
6440 error = ENOTCONN;
6441 else
6442 error = conn_getpeername(connp, sa, salenp);
6443 mutex_exit(&connp->conn_lock);
6444 return (error);
6445 }
6446
6447 /* ARGSUSED3 */
6448 int
udp_getsockname(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t * salenp,cred_t * cr)6449 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6450 socklen_t *salenp, cred_t *cr)
6451 {
6452 conn_t *connp = (conn_t *)proto_handle;
6453 int error;
6454
6455 /* All Solaris components should pass a cred for this operation. */
6456 ASSERT(cr != NULL);
6457
6458 mutex_enter(&connp->conn_lock);
6459 error = conn_getsockname(connp, sa, salenp);
6460 mutex_exit(&connp->conn_lock);
6461 return (error);
6462 }
6463
6464 int
udp_getsockopt(sock_lower_handle_t proto_handle,int level,int option_name,void * optvalp,socklen_t * optlen,cred_t * cr)6465 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6466 void *optvalp, socklen_t *optlen, cred_t *cr)
6467 {
6468 conn_t *connp = (conn_t *)proto_handle;
6469 int error;
6470 t_uscalar_t max_optbuf_len;
6471 void *optvalp_buf;
6472 int len;
6473
6474 /* All Solaris components should pass a cred for this operation. */
6475 ASSERT(cr != NULL);
6476
6477 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6478 udp_opt_obj.odb_opt_des_arr,
6479 udp_opt_obj.odb_opt_arr_cnt,
6480 B_FALSE, B_TRUE, cr);
6481 if (error != 0) {
6482 if (error < 0)
6483 error = proto_tlitosyserr(-error);
6484 return (error);
6485 }
6486
6487 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6488 len = udp_opt_get(connp, level, option_name, optvalp_buf);
6489 if (len == -1) {
6490 kmem_free(optvalp_buf, max_optbuf_len);
6491 return (EINVAL);
6492 }
6493
6494 /*
6495 * update optlen and copy option value
6496 */
6497 t_uscalar_t size = MIN(len, *optlen);
6498
6499 bcopy(optvalp_buf, optvalp, size);
6500 bcopy(&size, optlen, sizeof (size));
6501
6502 kmem_free(optvalp_buf, max_optbuf_len);
6503 return (0);
6504 }
6505
6506 int
udp_setsockopt(sock_lower_handle_t proto_handle,int level,int option_name,const void * optvalp,socklen_t optlen,cred_t * cr)6507 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6508 const void *optvalp, socklen_t optlen, cred_t *cr)
6509 {
6510 conn_t *connp = (conn_t *)proto_handle;
6511 int error;
6512
6513 /* All Solaris components should pass a cred for this operation. */
6514 ASSERT(cr != NULL);
6515
6516 error = proto_opt_check(level, option_name, optlen, NULL,
6517 udp_opt_obj.odb_opt_des_arr,
6518 udp_opt_obj.odb_opt_arr_cnt,
6519 B_TRUE, B_FALSE, cr);
6520
6521 if (error != 0) {
6522 if (error < 0)
6523 error = proto_tlitosyserr(-error);
6524 return (error);
6525 }
6526
6527 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6528 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6529 NULL, cr);
6530
6531 ASSERT(error >= 0);
6532
6533 return (error);
6534 }
6535
6536 void
udp_clr_flowctrl(sock_lower_handle_t proto_handle)6537 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6538 {
6539 conn_t *connp = (conn_t *)proto_handle;
6540 udp_t *udp = connp->conn_udp;
6541
6542 mutex_enter(&udp->udp_recv_lock);
6543 connp->conn_flow_cntrld = B_FALSE;
6544 mutex_exit(&udp->udp_recv_lock);
6545 }
6546
6547 /* ARGSUSED2 */
6548 int
udp_shutdown(sock_lower_handle_t proto_handle,int how,cred_t * cr)6549 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6550 {
6551 conn_t *connp = (conn_t *)proto_handle;
6552
6553 /* All Solaris components should pass a cred for this operation. */
6554 ASSERT(cr != NULL);
6555
6556 /* shut down the send side */
6557 if (how != SHUT_RD)
6558 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6559 SOCK_OPCTL_SHUT_SEND, 0);
6560 /* shut down the recv side */
6561 if (how != SHUT_WR)
6562 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6563 SOCK_OPCTL_SHUT_RECV, 0);
6564 return (0);
6565 }
6566
6567 int
udp_ioctl(sock_lower_handle_t proto_handle,int cmd,intptr_t arg,int mode,int32_t * rvalp,cred_t * cr)6568 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6569 int mode, int32_t *rvalp, cred_t *cr)
6570 {
6571 conn_t *connp = (conn_t *)proto_handle;
6572 int error;
6573
6574 /* All Solaris components should pass a cred for this operation. */
6575 ASSERT(cr != NULL);
6576
6577 /*
6578 * If we don't have a helper stream then create one.
6579 * ip_create_helper_stream takes care of locking the conn_t,
6580 * so this check for NULL is just a performance optimization.
6581 */
6582 if (connp->conn_helper_info == NULL) {
6583 udp_stack_t *us = connp->conn_udp->udp_us;
6584
6585 ASSERT(us->us_ldi_ident != NULL);
6586
6587 /*
6588 * Create a helper stream for non-STREAMS socket.
6589 */
6590 error = ip_create_helper_stream(connp, us->us_ldi_ident);
6591 if (error != 0) {
6592 ip0dbg(("tcp_ioctl: create of IP helper stream "
6593 "failed %d\n", error));
6594 return (error);
6595 }
6596 }
6597
6598 switch (cmd) {
6599 case _SIOCSOCKFALLBACK:
6600 case TI_GETPEERNAME:
6601 case TI_GETMYNAME:
6602 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6603 cmd));
6604 error = EINVAL;
6605 break;
6606 default:
6607 /*
6608 * Pass on to IP using helper stream
6609 */
6610 error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6611 cmd, arg, mode, cr, rvalp);
6612 break;
6613 }
6614 return (error);
6615 }
6616
6617 /* ARGSUSED */
6618 int
udp_accept(sock_lower_handle_t lproto_handle,sock_lower_handle_t eproto_handle,sock_upper_handle_t sock_handle,cred_t * cr)6619 udp_accept(sock_lower_handle_t lproto_handle,
6620 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6621 cred_t *cr)
6622 {
6623 return (EOPNOTSUPP);
6624 }
6625
6626 /* ARGSUSED */
6627 int
udp_listen(sock_lower_handle_t proto_handle,int backlog,cred_t * cr)6628 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6629 {
6630 return (EOPNOTSUPP);
6631 }
6632
6633 sock_downcalls_t sock_udp_downcalls = {
6634 udp_activate, /* sd_activate */
6635 udp_accept, /* sd_accept */
6636 udp_bind, /* sd_bind */
6637 udp_listen, /* sd_listen */
6638 udp_connect, /* sd_connect */
6639 udp_getpeername, /* sd_getpeername */
6640 udp_getsockname, /* sd_getsockname */
6641 udp_getsockopt, /* sd_getsockopt */
6642 udp_setsockopt, /* sd_setsockopt */
6643 udp_send, /* sd_send */
6644 NULL, /* sd_send_uio */
6645 NULL, /* sd_recv_uio */
6646 NULL, /* sd_poll */
6647 udp_shutdown, /* sd_shutdown */
6648 udp_clr_flowctrl, /* sd_setflowctrl */
6649 udp_ioctl, /* sd_ioctl */
6650 udp_close /* sd_close */
6651 };
6652