xref: /freebsd/sys/netinet6/udp6_usrreq.c (revision aa79fe245de7616cda41b69a296a5ce209c95c45)
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $
30  *	$KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $
31  */
32 
33 /*-
34  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
35  *	The Regents of the University of California.
36  * All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 4. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
63  */
64 
65 #include <sys/cdefs.h>
66 __FBSDID("$FreeBSD$");
67 
68 #include "opt_inet.h"
69 #include "opt_inet6.h"
70 #include "opt_ipsec.h"
71 
72 #include <sys/param.h>
73 #include <sys/jail.h>
74 #include <sys/kernel.h>
75 #include <sys/lock.h>
76 #include <sys/mbuf.h>
77 #include <sys/priv.h>
78 #include <sys/proc.h>
79 #include <sys/protosw.h>
80 #include <sys/signalvar.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/sx.h>
84 #include <sys/sysctl.h>
85 #include <sys/syslog.h>
86 #include <sys/systm.h>
87 #include <sys/vimage.h>
88 
89 #include <net/if.h>
90 #include <net/if_types.h>
91 #include <net/route.h>
92 
93 #include <netinet/in.h>
94 #include <netinet/in_pcb.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/in_var.h>
97 #include <netinet/ip.h>
98 #include <netinet/ip_icmp.h>
99 #include <netinet/ip6.h>
100 #include <netinet/icmp_var.h>
101 #include <netinet/icmp6.h>
102 #include <netinet/ip_var.h>
103 #include <netinet/udp.h>
104 #include <netinet/udp_var.h>
105 #include <netinet/vinet.h>
106 
107 #include <netinet6/ip6protosw.h>
108 #include <netinet6/ip6_var.h>
109 #include <netinet6/in6_pcb.h>
110 #include <netinet6/udp6_var.h>
111 #include <netinet6/scope6_var.h>
112 #include <netinet6/vinet6.h>
113 
114 #ifdef IPSEC
115 #include <netipsec/ipsec.h>
116 #include <netipsec/ipsec6.h>
117 #endif /* IPSEC */
118 
119 #include <security/mac/mac_framework.h>
120 
121 /*
122  * UDP protocol implementation.
123  * Per RFC 768, August, 1980.
124  */
125 
126 extern struct protosw	inetsw[];
127 static void		udp6_detach(struct socket *so);
128 
129 static void
130 udp6_append(struct inpcb *inp, struct mbuf *n, int off,
131     struct sockaddr_in6 *fromsa)
132 {
133 	INIT_VNET_INET(inp->inp_vnet);
134 	struct socket *so;
135 	struct mbuf *opts;
136 
137 	INP_LOCK_ASSERT(inp);
138 
139 #ifdef IPSEC
140 	/* Check AH/ESP integrity. */
141 	if (ipsec6_in_reject(n, inp)) {
142 		INIT_VNET_IPSEC(inp->inp_vnet);
143 		m_freem(n);
144 		V_ipsec6stat.in_polvio++;
145 		return;
146 	}
147 #endif /* IPSEC */
148 #ifdef MAC
149 	if (mac_inpcb_check_deliver(inp, n) != 0) {
150 		m_freem(n);
151 		return;
152 	}
153 #endif
154 	opts = NULL;
155 	if (inp->inp_flags & INP_CONTROLOPTS ||
156 	    inp->inp_socket->so_options & SO_TIMESTAMP)
157 		ip6_savecontrol(inp, n, &opts);
158 	m_adj(n, off + sizeof(struct udphdr));
159 
160 	so = inp->inp_socket;
161 	SOCKBUF_LOCK(&so->so_rcv);
162 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n,
163 	    opts) == 0) {
164 		SOCKBUF_UNLOCK(&so->so_rcv);
165 		m_freem(n);
166 		if (opts)
167 			m_freem(opts);
168 		UDPSTAT_INC(udps_fullsock);
169 	} else
170 		sorwakeup_locked(so);
171 }
172 
173 int
174 udp6_input(struct mbuf **mp, int *offp, int proto)
175 {
176 	INIT_VNET_INET(curvnet);
177 	INIT_VNET_INET6(curvnet);
178 	struct mbuf *m = *mp;
179 	struct ifnet *ifp;
180 	struct ip6_hdr *ip6;
181 	struct udphdr *uh;
182 	struct inpcb *inp;
183 	struct udpcb *up;
184 	int off = *offp;
185 	int plen, ulen;
186 	struct sockaddr_in6 fromsa;
187 
188 	ifp = m->m_pkthdr.rcvif;
189 	ip6 = mtod(m, struct ip6_hdr *);
190 
191 	if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
192 		/* XXX send icmp6 host/port unreach? */
193 		m_freem(m);
194 		return (IPPROTO_DONE);
195 	}
196 
197 #ifndef PULLDOWN_TEST
198 	IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE);
199 	ip6 = mtod(m, struct ip6_hdr *);
200 	uh = (struct udphdr *)((caddr_t)ip6 + off);
201 #else
202 	IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh));
203 	if (!uh)
204 		return (IPPROTO_DONE);
205 #endif
206 
207 	UDPSTAT_INC(udps_ipackets);
208 
209 	/*
210 	 * Destination port of 0 is illegal, based on RFC768.
211 	 */
212 	if (uh->uh_dport == 0)
213 		goto badunlocked;
214 
215 	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
216 	ulen = ntohs((u_short)uh->uh_ulen);
217 
218 	if (plen != ulen) {
219 		UDPSTAT_INC(udps_badlen);
220 		goto badunlocked;
221 	}
222 
223 	/*
224 	 * Checksum extended UDP header and data.
225 	 */
226 	if (uh->uh_sum == 0) {
227 		UDPSTAT_INC(udps_nosum);
228 		goto badunlocked;
229 	}
230 	if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) {
231 		UDPSTAT_INC(udps_badsum);
232 		goto badunlocked;
233 	}
234 
235 	/*
236 	 * Construct sockaddr format source address.
237 	 */
238 	init_sin6(&fromsa, m);
239 	fromsa.sin6_port = uh->uh_sport;
240 
241 	INP_INFO_RLOCK(&V_udbinfo);
242 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
243 		struct inpcb *last;
244 		struct ip6_moptions *imo;
245 
246 		/*
247 		 * In the event that laddr should be set to the link-local
248 		 * address (this happens in RIPng), the multicast address
249 		 * specified in the received packet will not match laddr.  To
250 		 * handle this situation, matching is relaxed if the
251 		 * receiving interface is the same as one specified in the
252 		 * socket and if the destination multicast address matches
253 		 * one of the multicast groups specified in the socket.
254 		 */
255 
256 		/*
257 		 * KAME note: traditionally we dropped udpiphdr from mbuf
258 		 * here.  We need udphdr for IPsec processing so we do that
259 		 * later.
260 		 */
261 		last = NULL;
262 		LIST_FOREACH(inp, &V_udb, inp_list) {
263 			if ((inp->inp_vflag & INP_IPV6) == 0)
264 				continue;
265 			if (inp->inp_lport != uh->uh_dport)
266 				continue;
267 			if (inp->inp_fport != 0 &&
268 			    inp->inp_fport != uh->uh_sport)
269 				continue;
270 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
271 				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
272 							&ip6->ip6_dst))
273 					continue;
274 			}
275 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
276 				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
277 							&ip6->ip6_src) ||
278 				    inp->inp_fport != uh->uh_sport)
279 					continue;
280 			}
281 
282 			/*
283 			 * Handle socket delivery policy for any-source
284 			 * and source-specific multicast. [RFC3678]
285 			 */
286 			imo = inp->in6p_moptions;
287 			if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
288 				struct sockaddr_in6	 mcaddr;
289 				int			 blocked;
290 
291 				INP_RLOCK(inp);
292 
293 				bzero(&mcaddr, sizeof(struct sockaddr_in6));
294 				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
295 				mcaddr.sin6_family = AF_INET6;
296 				mcaddr.sin6_addr = ip6->ip6_dst;
297 
298 				blocked = im6o_mc_filter(imo, ifp,
299 					(struct sockaddr *)&mcaddr,
300 					(struct sockaddr *)&fromsa);
301 				if (blocked != MCAST_PASS) {
302 					if (blocked == MCAST_NOTGMEMBER)
303 						IP6STAT_INC(ip6s_notmember);
304 					if (blocked == MCAST_NOTSMEMBER ||
305 					    blocked == MCAST_MUTED)
306 						UDPSTAT_INC(udps_filtermcast);
307 					INP_RUNLOCK(inp); /* XXX */
308 					continue;
309 				}
310 
311 				INP_RUNLOCK(inp);
312 			}
313 			if (last != NULL) {
314 				struct mbuf *n;
315 
316 				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
317 					INP_RLOCK(last);
318 					up = intoudpcb(last);
319 					if (up->u_tun_func == NULL) {
320 						udp6_append(last, n, off, &fromsa);
321 					} else {
322 						/*
323 						 * Engage the tunneling
324 						 * protocol we will have to
325 						 * leave the info_lock up,
326 						 * since we are hunting
327 						 * through multiple UDP's.
328 						 *
329 						 */
330 						(*up->u_tun_func)(n, off, last);
331 					}
332 					INP_RUNLOCK(last);
333 				}
334 			}
335 			last = inp;
336 			/*
337 			 * Don't look for additional matches if this one does
338 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
339 			 * socket options set.  This heuristic avoids
340 			 * searching through all pcbs in the common case of a
341 			 * non-shared port.  It assumes that an application
342 			 * will never clear these options after setting them.
343 			 */
344 			if ((last->inp_socket->so_options &
345 			     (SO_REUSEPORT|SO_REUSEADDR)) == 0)
346 				break;
347 		}
348 
349 		if (last == NULL) {
350 			/*
351 			 * No matching pcb found; discard datagram.  (No need
352 			 * to send an ICMP Port Unreachable for a broadcast
353 			 * or multicast datgram.)
354 			 */
355 			UDPSTAT_INC(udps_noport);
356 			UDPSTAT_INC(udps_noportmcast);
357 			goto badheadlocked;
358 		}
359 		INP_RLOCK(last);
360 		INP_INFO_RUNLOCK(&V_udbinfo);
361 		up = intoudpcb(last);
362 		if (up->u_tun_func == NULL) {
363 			udp6_append(last, m, off, &fromsa);
364 		} else {
365 			/*
366 			 * Engage the tunneling protocol.
367 			 */
368 			(*up->u_tun_func)(m, off, last);
369 		}
370 		INP_RUNLOCK(last);
371 		return (IPPROTO_DONE);
372 	}
373 	/*
374 	 * Locate pcb for datagram.
375 	 */
376 	inp = in6_pcblookup_hash(&V_udbinfo, &ip6->ip6_src, uh->uh_sport,
377 	    &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
378 	if (inp == NULL) {
379 		if (udp_log_in_vain) {
380 			char ip6bufs[INET6_ADDRSTRLEN];
381 			char ip6bufd[INET6_ADDRSTRLEN];
382 
383 			log(LOG_INFO,
384 			    "Connection attempt to UDP [%s]:%d from [%s]:%d\n",
385 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
386 			    ntohs(uh->uh_dport),
387 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
388 			    ntohs(uh->uh_sport));
389 		}
390 		UDPSTAT_INC(udps_noport);
391 		if (m->m_flags & M_MCAST) {
392 			printf("UDP6: M_MCAST is set in a unicast packet.\n");
393 			UDPSTAT_INC(udps_noportmcast);
394 			goto badheadlocked;
395 		}
396 		INP_INFO_RUNLOCK(&V_udbinfo);
397 		if (V_udp_blackhole)
398 			goto badunlocked;
399 		if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
400 			goto badunlocked;
401 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
402 		return (IPPROTO_DONE);
403 	}
404 	INP_RLOCK(inp);
405 	INP_INFO_RUNLOCK(&V_udbinfo);
406 	up = intoudpcb(inp);
407 	if (up->u_tun_func == NULL) {
408 		udp6_append(inp, m, off, &fromsa);
409 	} else {
410 		/*
411 		 * Engage the tunneling protocol.
412 		 */
413 
414 		(*up->u_tun_func)(m, off, inp);
415 	}
416 	INP_RUNLOCK(inp);
417 	return (IPPROTO_DONE);
418 
419 badheadlocked:
420 	INP_INFO_RUNLOCK(&V_udbinfo);
421 badunlocked:
422 	if (m)
423 		m_freem(m);
424 	return (IPPROTO_DONE);
425 }
426 
427 void
428 udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
429 {
430 	INIT_VNET_INET(curvnet);
431 	struct udphdr uh;
432 	struct ip6_hdr *ip6;
433 	struct mbuf *m;
434 	int off = 0;
435 	struct ip6ctlparam *ip6cp = NULL;
436 	const struct sockaddr_in6 *sa6_src = NULL;
437 	void *cmdarg;
438 	struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
439 	struct udp_portonly {
440 		u_int16_t uh_sport;
441 		u_int16_t uh_dport;
442 	} *uhp;
443 
444 	if (sa->sa_family != AF_INET6 ||
445 	    sa->sa_len != sizeof(struct sockaddr_in6))
446 		return;
447 
448 	if ((unsigned)cmd >= PRC_NCMDS)
449 		return;
450 	if (PRC_IS_REDIRECT(cmd))
451 		notify = in6_rtchange, d = NULL;
452 	else if (cmd == PRC_HOSTDEAD)
453 		d = NULL;
454 	else if (inet6ctlerrmap[cmd] == 0)
455 		return;
456 
457 	/* if the parameter is from icmp6, decode it. */
458 	if (d != NULL) {
459 		ip6cp = (struct ip6ctlparam *)d;
460 		m = ip6cp->ip6c_m;
461 		ip6 = ip6cp->ip6c_ip6;
462 		off = ip6cp->ip6c_off;
463 		cmdarg = ip6cp->ip6c_cmdarg;
464 		sa6_src = ip6cp->ip6c_src;
465 	} else {
466 		m = NULL;
467 		ip6 = NULL;
468 		cmdarg = NULL;
469 		sa6_src = &sa6_any;
470 	}
471 
472 	if (ip6) {
473 		/*
474 		 * XXX: We assume that when IPV6 is non NULL,
475 		 * M and OFF are valid.
476 		 */
477 
478 		/* Check if we can safely examine src and dst ports. */
479 		if (m->m_pkthdr.len < off + sizeof(*uhp))
480 			return;
481 
482 		bzero(&uh, sizeof(uh));
483 		m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
484 
485 		(void) in6_pcbnotify(&V_udbinfo, sa, uh.uh_dport,
486 		    (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
487 		    cmdarg, notify);
488 	} else
489 		(void) in6_pcbnotify(&V_udbinfo, sa, 0,
490 		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
491 }
492 
493 static int
494 udp6_getcred(SYSCTL_HANDLER_ARGS)
495 {
496 	INIT_VNET_INET(curvnet);
497 	INIT_VNET_INET6(curvnet);
498 	struct xucred xuc;
499 	struct sockaddr_in6 addrs[2];
500 	struct inpcb *inp;
501 	int error;
502 
503 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
504 	if (error)
505 		return (error);
506 
507 	if (req->newlen != sizeof(addrs))
508 		return (EINVAL);
509 	if (req->oldlen != sizeof(struct xucred))
510 		return (EINVAL);
511 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
512 	if (error)
513 		return (error);
514 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
515 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
516 		return (error);
517 	}
518 	INP_INFO_RLOCK(&V_udbinfo);
519 	inp = in6_pcblookup_hash(&V_udbinfo, &addrs[1].sin6_addr,
520 	    addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1,
521 	    NULL);
522 	if (inp != NULL) {
523 		INP_RLOCK(inp);
524 		INP_INFO_RUNLOCK(&V_udbinfo);
525 		if (inp->inp_socket == NULL)
526 			error = ENOENT;
527 		if (error == 0)
528 			error = cr_canseesocket(req->td->td_ucred,
529 			    inp->inp_socket);
530 		if (error == 0)
531 			cru2x(inp->inp_cred, &xuc);
532 		INP_RUNLOCK(inp);
533 	} else {
534 		INP_INFO_RUNLOCK(&V_udbinfo);
535 		error = ENOENT;
536 	}
537 	if (error == 0)
538 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
539 	return (error);
540 }
541 
542 SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
543     0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
544 
545 static int
546 udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
547     struct mbuf *control, struct thread *td)
548 {
549 	INIT_VNET_INET(curvnet);
550 	INIT_VNET_INET6(curvnet);
551 	u_int32_t ulen = m->m_pkthdr.len;
552 	u_int32_t plen = sizeof(struct udphdr) + ulen;
553 	struct ip6_hdr *ip6;
554 	struct udphdr *udp6;
555 	struct in6_addr *laddr, *faddr;
556 	struct sockaddr_in6 *sin6 = NULL;
557 	struct ifnet *oifp = NULL;
558 	int scope_ambiguous = 0;
559 	u_short fport;
560 	int error = 0;
561 	struct ip6_pktopts *optp, opt;
562 	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
563 	int flags;
564 	struct sockaddr_in6 tmp;
565 
566 	INP_WLOCK_ASSERT(inp);
567 
568 	if (addr6) {
569 		/* addr6 has been validated in udp6_send(). */
570 		sin6 = (struct sockaddr_in6 *)addr6;
571 
572 		/* protect *sin6 from overwrites */
573 		tmp = *sin6;
574 		sin6 = &tmp;
575 
576 		/*
577 		 * Application should provide a proper zone ID or the use of
578 		 * default zone IDs should be enabled.  Unfortunately, some
579 		 * applications do not behave as it should, so we need a
580 		 * workaround.  Even if an appropriate ID is not determined,
581 		 * we'll see if we can determine the outgoing interface.  If we
582 		 * can, determine the zone ID based on the interface below.
583 		 */
584 		if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
585 			scope_ambiguous = 1;
586 		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
587 			return (error);
588 	}
589 
590 	if (control) {
591 		if ((error = ip6_setpktopts(control, &opt,
592 		    inp->in6p_outputopts, td->td_ucred, IPPROTO_UDP)) != 0)
593 			goto release;
594 		optp = &opt;
595 	} else
596 		optp = inp->in6p_outputopts;
597 
598 	if (sin6) {
599 		faddr = &sin6->sin6_addr;
600 
601 		/*
602 		 * IPv4 version of udp_output calls in_pcbconnect in this case,
603 		 * which needs splnet and affects performance.
604 		 * Since we saw no essential reason for calling in_pcbconnect,
605 		 * we get rid of such kind of logic, and call in6_selectsrc
606 		 * and in6_pcbsetport in order to fill in the local address
607 		 * and the local port.
608 		 */
609 		if (sin6->sin6_port == 0) {
610 			error = EADDRNOTAVAIL;
611 			goto release;
612 		}
613 
614 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
615 			/* how about ::ffff:0.0.0.0 case? */
616 			error = EISCONN;
617 			goto release;
618 		}
619 
620 		fport = sin6->sin6_port; /* allow 0 port */
621 
622 		if (IN6_IS_ADDR_V4MAPPED(faddr)) {
623 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
624 				/*
625 				 * I believe we should explicitly discard the
626 				 * packet when mapped addresses are disabled,
627 				 * rather than send the packet as an IPv6 one.
628 				 * If we chose the latter approach, the packet
629 				 * might be sent out on the wire based on the
630 				 * default route, the situation which we'd
631 				 * probably want to avoid.
632 				 * (20010421 jinmei@kame.net)
633 				 */
634 				error = EINVAL;
635 				goto release;
636 			}
637 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
638 			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
639 				/*
640 				 * when remote addr is an IPv4-mapped address,
641 				 * local addr should not be an IPv6 address,
642 				 * since you cannot determine how to map IPv6
643 				 * source address to IPv4.
644 				 */
645 				error = EINVAL;
646 				goto release;
647 			}
648 
649 			af = AF_INET;
650 		}
651 
652 		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
653 			laddr = in6_selectsrc(sin6, optp, inp, NULL,
654 			    td->td_ucred, &oifp, &error);
655 			if (oifp && scope_ambiguous &&
656 			    (error = in6_setscope(&sin6->sin6_addr,
657 			    oifp, NULL))) {
658 				goto release;
659 			}
660 		} else
661 			laddr = &inp->in6p_laddr;	/* XXX */
662 		if (laddr == NULL) {
663 			if (error == 0)
664 				error = EADDRNOTAVAIL;
665 			goto release;
666 		}
667 		if (inp->inp_lport == 0 &&
668 		    (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0)
669 			goto release;
670 	} else {
671 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
672 			error = ENOTCONN;
673 			goto release;
674 		}
675 		if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
676 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
677 				/*
678 				 * XXX: this case would happen when the
679 				 * application sets the V6ONLY flag after
680 				 * connecting the foreign address.
681 				 * Such applications should be fixed,
682 				 * so we bark here.
683 				 */
684 				log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
685 				    "option was set for a connected socket\n");
686 				error = EINVAL;
687 				goto release;
688 			} else
689 				af = AF_INET;
690 		}
691 		laddr = &inp->in6p_laddr;
692 		faddr = &inp->in6p_faddr;
693 		fport = inp->inp_fport;
694 	}
695 
696 	if (af == AF_INET)
697 		hlen = sizeof(struct ip);
698 
699 	/*
700 	 * Calculate data length and get a mbuf
701 	 * for UDP and IP6 headers.
702 	 */
703 	M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
704 	if (m == 0) {
705 		error = ENOBUFS;
706 		goto release;
707 	}
708 
709 	/*
710 	 * Stuff checksum and output datagram.
711 	 */
712 	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
713 	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
714 	udp6->uh_dport = fport;
715 	if (plen <= 0xffff)
716 		udp6->uh_ulen = htons((u_short)plen);
717 	else
718 		udp6->uh_ulen = 0;
719 	udp6->uh_sum = 0;
720 
721 	switch (af) {
722 	case AF_INET6:
723 		ip6 = mtod(m, struct ip6_hdr *);
724 		ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
725 		ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
726 		ip6->ip6_vfc	|= IPV6_VERSION;
727 #if 0				/* ip6_plen will be filled in ip6_output. */
728 		ip6->ip6_plen	= htons((u_short)plen);
729 #endif
730 		ip6->ip6_nxt	= IPPROTO_UDP;
731 		ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
732 		ip6->ip6_src	= *laddr;
733 		ip6->ip6_dst	= *faddr;
734 
735 		if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP,
736 				sizeof(struct ip6_hdr), plen)) == 0) {
737 			udp6->uh_sum = 0xffff;
738 		}
739 
740 		flags = 0;
741 
742 		UDPSTAT_INC(udps_opackets);
743 		error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
744 		    NULL, inp);
745 		break;
746 	case AF_INET:
747 		error = EAFNOSUPPORT;
748 		goto release;
749 	}
750 	goto releaseopt;
751 
752 release:
753 	m_freem(m);
754 
755 releaseopt:
756 	if (control) {
757 		ip6_clearpktopts(&opt, -1);
758 		m_freem(control);
759 	}
760 	return (error);
761 }
762 
763 static void
764 udp6_abort(struct socket *so)
765 {
766 	INIT_VNET_INET(so->so_vnet);
767 	struct inpcb *inp;
768 
769 	inp = sotoinpcb(so);
770 	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
771 
772 #ifdef INET
773 	if (inp->inp_vflag & INP_IPV4) {
774 		struct pr_usrreqs *pru;
775 
776 		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
777 		(*pru->pru_abort)(so);
778 		return;
779 	}
780 #endif
781 
782 	INP_INFO_WLOCK(&V_udbinfo);
783 	INP_WLOCK(inp);
784 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
785 		in6_pcbdisconnect(inp);
786 		inp->in6p_laddr = in6addr_any;
787 		soisdisconnected(so);
788 	}
789 	INP_WUNLOCK(inp);
790 	INP_INFO_WUNLOCK(&V_udbinfo);
791 }
792 
793 static int
794 udp6_attach(struct socket *so, int proto, struct thread *td)
795 {
796 	INIT_VNET_INET(so->so_vnet);
797 	struct inpcb *inp;
798 	int error;
799 
800 	inp = sotoinpcb(so);
801 	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
802 
803 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
804 		error = soreserve(so, udp_sendspace, udp_recvspace);
805 		if (error)
806 			return (error);
807 	}
808 	INP_INFO_WLOCK(&V_udbinfo);
809 	error = in_pcballoc(so, &V_udbinfo);
810 	if (error) {
811 		INP_INFO_WUNLOCK(&V_udbinfo);
812 		return (error);
813 	}
814 	inp = (struct inpcb *)so->so_pcb;
815 	inp->inp_vflag |= INP_IPV6;
816 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
817 		inp->inp_vflag |= INP_IPV4;
818 	inp->in6p_hops = -1;	/* use kernel default */
819 	inp->in6p_cksum = -1;	/* just to be sure */
820 	/*
821 	 * XXX: ugly!!
822 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
823 	 * because the socket may be bound to an IPv6 wildcard address,
824 	 * which may match an IPv4-mapped IPv6 address.
825 	 */
826 	inp->inp_ip_ttl = V_ip_defttl;
827 
828 	error = udp_newudpcb(inp);
829 	if (error) {
830 		in_pcbdetach(inp);
831 		in_pcbfree(inp);
832 		INP_INFO_WUNLOCK(&V_udbinfo);
833 		return (error);
834 	}
835 	INP_WUNLOCK(inp);
836 	INP_INFO_WUNLOCK(&V_udbinfo);
837 	return (0);
838 }
839 
840 static int
841 udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
842 {
843 	INIT_VNET_INET(so->so_vnet);
844 	struct inpcb *inp;
845 	int error;
846 
847 	inp = sotoinpcb(so);
848 	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
849 
850 	INP_INFO_WLOCK(&V_udbinfo);
851 	INP_WLOCK(inp);
852 	inp->inp_vflag &= ~INP_IPV4;
853 	inp->inp_vflag |= INP_IPV6;
854 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
855 		struct sockaddr_in6 *sin6_p;
856 
857 		sin6_p = (struct sockaddr_in6 *)nam;
858 
859 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
860 			inp->inp_vflag |= INP_IPV4;
861 		else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
862 			struct sockaddr_in sin;
863 
864 			in6_sin6_2_sin(&sin, sin6_p);
865 			inp->inp_vflag |= INP_IPV4;
866 			inp->inp_vflag &= ~INP_IPV6;
867 			error = in_pcbbind(inp, (struct sockaddr *)&sin,
868 			    td->td_ucred);
869 			goto out;
870 		}
871 	}
872 
873 	error = in6_pcbbind(inp, nam, td->td_ucred);
874 out:
875 	INP_WUNLOCK(inp);
876 	INP_INFO_WUNLOCK(&V_udbinfo);
877 	return (error);
878 }
879 
880 static void
881 udp6_close(struct socket *so)
882 {
883 	INIT_VNET_INET(so->so_vnet);
884 	struct inpcb *inp;
885 
886 	inp = sotoinpcb(so);
887 	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
888 
889 #ifdef INET
890 	if (inp->inp_vflag & INP_IPV4) {
891 		struct pr_usrreqs *pru;
892 
893 		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
894 		(*pru->pru_disconnect)(so);
895 		return;
896 	}
897 #endif
898 	INP_INFO_WLOCK(&V_udbinfo);
899 	INP_WLOCK(inp);
900 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
901 		in6_pcbdisconnect(inp);
902 		inp->in6p_laddr = in6addr_any;
903 		soisdisconnected(so);
904 	}
905 	INP_WUNLOCK(inp);
906 	INP_INFO_WUNLOCK(&V_udbinfo);
907 }
908 
909 static int
910 udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
911 {
912 	INIT_VNET_INET(so->so_vnet);
913 	struct inpcb *inp;
914 	struct sockaddr_in6 *sin6;
915 	int error;
916 
917 	inp = sotoinpcb(so);
918 	sin6 = (struct sockaddr_in6 *)nam;
919 	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
920 
921 	INP_INFO_WLOCK(&V_udbinfo);
922 	INP_WLOCK(inp);
923 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
924 	    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
925 		struct sockaddr_in sin;
926 
927 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
928 			error = EISCONN;
929 			goto out;
930 		}
931 		in6_sin6_2_sin(&sin, sin6);
932 		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
933 		if (error != 0)
934 			goto out;
935 		error = in_pcbconnect(inp, (struct sockaddr *)&sin,
936 		    td->td_ucred);
937 		if (error == 0) {
938 			inp->inp_vflag |= INP_IPV4;
939 			inp->inp_vflag &= ~INP_IPV6;
940 			soisconnected(so);
941 		}
942 		goto out;
943 	}
944 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
945 		error = EISCONN;
946 		goto out;
947 	}
948 	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
949 	if (error != 0)
950 		goto out;
951 	error = in6_pcbconnect(inp, nam, td->td_ucred);
952 	if (error == 0) {
953 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
954 			/* should be non mapped addr */
955 			inp->inp_vflag &= ~INP_IPV4;
956 			inp->inp_vflag |= INP_IPV6;
957 		}
958 		soisconnected(so);
959 	}
960 out:
961 	INP_WUNLOCK(inp);
962 	INP_INFO_WUNLOCK(&V_udbinfo);
963 	return (error);
964 }
965 
966 static void
967 udp6_detach(struct socket *so)
968 {
969 	INIT_VNET_INET(so->so_vnet);
970 	struct inpcb *inp;
971 	struct udpcb *up;
972 
973 	inp = sotoinpcb(so);
974 	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
975 
976 	INP_INFO_WLOCK(&V_udbinfo);
977 	INP_WLOCK(inp);
978 	up = intoudpcb(inp);
979 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
980 	in_pcbdetach(inp);
981 	in_pcbfree(inp);
982 	INP_INFO_WUNLOCK(&V_udbinfo);
983 	udp_discardcb(up);
984 }
985 
986 static int
987 udp6_disconnect(struct socket *so)
988 {
989 	INIT_VNET_INET(so->so_vnet);
990 	struct inpcb *inp;
991 	int error;
992 
993 	inp = sotoinpcb(so);
994 	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
995 
996 	INP_INFO_WLOCK(&V_udbinfo);
997 	INP_WLOCK(inp);
998 
999 #ifdef INET
1000 	if (inp->inp_vflag & INP_IPV4) {
1001 		struct pr_usrreqs *pru;
1002 
1003 		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
1004 		error = (*pru->pru_disconnect)(so);
1005 		goto out;
1006 	}
1007 #endif
1008 
1009 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
1010 		error = ENOTCONN;
1011 		goto out;
1012 	}
1013 
1014 	in6_pcbdisconnect(inp);
1015 	inp->in6p_laddr = in6addr_any;
1016 	SOCK_LOCK(so);
1017 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
1018 	SOCK_UNLOCK(so);
1019 out:
1020 	INP_WUNLOCK(inp);
1021 	INP_INFO_WUNLOCK(&V_udbinfo);
1022 	return (0);
1023 }
1024 
1025 static int
1026 udp6_send(struct socket *so, int flags, struct mbuf *m,
1027     struct sockaddr *addr, struct mbuf *control, struct thread *td)
1028 {
1029 	INIT_VNET_INET(so->so_vnet);
1030 	struct inpcb *inp;
1031 	int error = 0;
1032 
1033 	inp = sotoinpcb(so);
1034 	KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
1035 
1036 	INP_INFO_WLOCK(&V_udbinfo);
1037 	INP_WLOCK(inp);
1038 	if (addr) {
1039 		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
1040 			error = EINVAL;
1041 			goto bad;
1042 		}
1043 		if (addr->sa_family != AF_INET6) {
1044 			error = EAFNOSUPPORT;
1045 			goto bad;
1046 		}
1047 	}
1048 
1049 #ifdef INET
1050 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
1051 		int hasv4addr;
1052 		struct sockaddr_in6 *sin6 = 0;
1053 
1054 		if (addr == 0)
1055 			hasv4addr = (inp->inp_vflag & INP_IPV4);
1056 		else {
1057 			sin6 = (struct sockaddr_in6 *)addr;
1058 			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
1059 			    ? 1 : 0;
1060 		}
1061 		if (hasv4addr) {
1062 			struct pr_usrreqs *pru;
1063 
1064 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
1065 			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
1066 				/*
1067 				 * When remote addr is IPv4-mapped address,
1068 				 * local addr should not be an IPv6 address;
1069 				 * since you cannot determine how to map IPv6
1070 				 * source address to IPv4.
1071 				 */
1072 				error = EINVAL;
1073 				goto out;
1074 			}
1075 
1076 			/*
1077 			 * XXXRW: We release UDP-layer locks before calling
1078 			 * udp_send() in order to avoid recursion.  However,
1079 			 * this does mean there is a short window where inp's
1080 			 * fields are unstable.  Could this lead to a
1081 			 * potential race in which the factors causing us to
1082 			 * select the UDPv4 output routine are invalidated?
1083 			 */
1084 			INP_WUNLOCK(inp);
1085 			INP_INFO_WUNLOCK(&V_udbinfo);
1086 			if (sin6)
1087 				in6_sin6_2_sin_in_sock(addr);
1088 			pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
1089 			/* addr will just be freed in sendit(). */
1090 			return ((*pru->pru_send)(so, flags, m, addr, control,
1091 			    td));
1092 		}
1093 	}
1094 #endif
1095 #ifdef MAC
1096 	mac_inpcb_create_mbuf(inp, m);
1097 #endif
1098 	error = udp6_output(inp, m, addr, control, td);
1099 out:
1100 	INP_WUNLOCK(inp);
1101 	INP_INFO_WUNLOCK(&V_udbinfo);
1102 	return (error);
1103 
1104 bad:
1105 	INP_WUNLOCK(inp);
1106 	INP_INFO_WUNLOCK(&V_udbinfo);
1107 	m_freem(m);
1108 	return (error);
1109 }
1110 
1111 struct pr_usrreqs udp6_usrreqs = {
1112 	.pru_abort =		udp6_abort,
1113 	.pru_attach =		udp6_attach,
1114 	.pru_bind =		udp6_bind,
1115 	.pru_connect =		udp6_connect,
1116 	.pru_control =		in6_control,
1117 	.pru_detach =		udp6_detach,
1118 	.pru_disconnect =	udp6_disconnect,
1119 	.pru_peeraddr =		in6_mapped_peeraddr,
1120 	.pru_send =		udp6_send,
1121 	.pru_shutdown =		udp_shutdown,
1122 	.pru_sockaddr =		in6_mapped_sockaddr,
1123 	.pru_soreceive =	soreceive_dgram,
1124 	.pru_sosend =		sosend_dgram,
1125 	.pru_sosetlabel =	in_pcbsosetlabel,
1126 	.pru_close =		udp6_close
1127 };
1128