xref: /freebsd/sys/netinet/in_pcb.c (revision b601c69bdbe8755d26570261d7fd4c02ee4eff74)
1 /*
2  * Copyright (c) 1982, 1986, 1991, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_ipsec.h"
38 #include "opt_inet6.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/proc.h>
49 #include <sys/jail.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 
53 #include <machine/limits.h>
54 
55 #include <vm/vm_zone.h>
56 
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 
61 #include <netinet/in.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/in_var.h>
64 #include <netinet/ip_var.h>
65 #ifdef INET6
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #endif /* INET6 */
69 
70 #include "faith.h"
71 
72 #ifdef IPSEC
73 #include <netinet6/ipsec.h>
74 #include <netkey/key.h>
75 #endif /* IPSEC */
76 
77 struct	in_addr zeroin_addr;
78 
79 static void	in_rtchange __P((struct inpcb *, int));
80 
81 /*
82  * These configure the range of local port addresses assigned to
83  * "unspecified" outgoing connections/packets/whatever.
84  */
85 int	ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
86 int	ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
87 int	ipport_firstauto = IPPORT_RESERVED;		/* 1024 */
88 int	ipport_lastauto  = IPPORT_USERRESERVED;		/* 5000 */
89 int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
90 int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
91 
92 #define RANGECHK(var, min, max) \
93 	if ((var) < (min)) { (var) = (min); } \
94 	else if ((var) > (max)) { (var) = (max); }
95 
96 static int
97 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
98 {
99 	int error = sysctl_handle_int(oidp,
100 		oidp->oid_arg1, oidp->oid_arg2, req);
101 	if (!error) {
102 		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
103 		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
104 		RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
105 		RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
106 		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
107 		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
108 	}
109 	return error;
110 }
111 
112 #undef RANGECHK
113 
114 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
115 
116 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
117 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
118 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
119 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
120 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
121 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
122 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
123 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
124 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
125 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
126 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
127 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
128 
129 /*
130  * in_pcb.c: manage the Protocol Control Blocks.
131  *
132  * NOTE: It is assumed that most of these functions will be called at
133  * splnet(). XXX - There are, unfortunately, a few exceptions to this
134  * rule that should be fixed.
135  */
136 
137 /*
138  * Allocate a PCB and associate it with the socket.
139  */
140 int
141 in_pcballoc(so, pcbinfo, p)
142 	struct socket *so;
143 	struct inpcbinfo *pcbinfo;
144 	struct proc *p;
145 {
146 	register struct inpcb *inp;
147 
148 	inp = zalloci(pcbinfo->ipi_zone);
149 	if (inp == NULL)
150 		return (ENOBUFS);
151 	bzero((caddr_t)inp, sizeof(*inp));
152 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
153 	inp->inp_pcbinfo = pcbinfo;
154 	inp->inp_socket = so;
155 #if defined(INET6)
156 	if (ip6_mapped_addr_on)
157 		inp->inp_flags &= ~IN6P_BINDV6ONLY;
158 	else
159 		inp->inp_flags |= IN6P_BINDV6ONLY;
160 #endif
161 	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
162 	pcbinfo->ipi_count++;
163 	so->so_pcb = (caddr_t)inp;
164 	return (0);
165 }
166 
167 int
168 in_pcbbind(inp, nam, p)
169 	register struct inpcb *inp;
170 	struct sockaddr *nam;
171 	struct proc *p;
172 {
173 	register struct socket *so = inp->inp_socket;
174 	unsigned short *lastport;
175 	struct sockaddr_in *sin;
176 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
177 	u_short lport = 0;
178 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
179 	int error, prison = 0;
180 
181 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
182 		return (EADDRNOTAVAIL);
183 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
184 		return (EINVAL);
185 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
186 		wild = 1;
187 	if (nam) {
188 		sin = (struct sockaddr_in *)nam;
189 		if (nam->sa_len != sizeof (*sin))
190 			return (EINVAL);
191 #ifdef notdef
192 		/*
193 		 * We should check the family, but old programs
194 		 * incorrectly fail to initialize it.
195 		 */
196 		if (sin->sin_family != AF_INET)
197 			return (EAFNOSUPPORT);
198 #endif
199 		if (prison_ip(p, 0, &sin->sin_addr.s_addr))
200 			return(EINVAL);
201 		lport = sin->sin_port;
202 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
203 			/*
204 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
205 			 * allow complete duplication of binding if
206 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
207 			 * and a multicast address is bound on both
208 			 * new and duplicated sockets.
209 			 */
210 			if (so->so_options & SO_REUSEADDR)
211 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
212 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
213 			sin->sin_port = 0;		/* yech... */
214 			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
215 				return (EADDRNOTAVAIL);
216 		}
217 		if (lport) {
218 			struct inpcb *t;
219 
220 			/* GROSS */
221 			if (ntohs(lport) < IPPORT_RESERVED && p &&
222 			    suser_xxx(0, p, PRISON_ROOT))
223 				return (EACCES);
224 			if (p && p->p_prison)
225 				prison = 1;
226 			if (so->so_cred->cr_uid != 0 &&
227 			    !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
228 				t = in_pcblookup_local(inp->inp_pcbinfo,
229 				    sin->sin_addr, lport,
230 				    prison ? 0 :  INPLOOKUP_WILDCARD);
231 				if (t &&
232 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
233 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
234 				     (t->inp_socket->so_options &
235 					 SO_REUSEPORT) == 0) &&
236 				    (so->so_cred->cr_uid !=
237 				     t->inp_socket->so_cred->cr_uid)) {
238 #if defined(INET6)
239 					if ((inp->inp_flags &
240 					     IN6P_BINDV6ONLY) != 0 ||
241 					    ntohl(sin->sin_addr.s_addr) !=
242 					    INADDR_ANY ||
243 					    ntohl(t->inp_laddr.s_addr) !=
244 					    INADDR_ANY ||
245 					    INP_SOCKAF(so) ==
246 					    INP_SOCKAF(t->inp_socket))
247 #endif /* defined(INET6) */
248 					return (EADDRINUSE);
249 				}
250 			}
251 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
252 			    lport, prison ? 0 : wild);
253 			if (t &&
254 			    (reuseport & t->inp_socket->so_options) == 0) {
255 #if defined(INET6)
256 				if ((inp->inp_flags & IN6P_BINDV6ONLY) != 0 ||
257 				    ntohl(sin->sin_addr.s_addr) !=
258 				    INADDR_ANY ||
259 				    ntohl(t->inp_laddr.s_addr) !=
260 				    INADDR_ANY ||
261 				    INP_SOCKAF(so) ==
262 				    INP_SOCKAF(t->inp_socket))
263 #endif /* defined(INET6) */
264 				return (EADDRINUSE);
265 			}
266 		}
267 		inp->inp_laddr = sin->sin_addr;
268 	}
269 	if (lport == 0) {
270 		ushort first, last;
271 		int count;
272 
273 		if (prison_ip(p, 0, &inp->inp_laddr.s_addr ))
274 			return (EINVAL);
275 		inp->inp_flags |= INP_ANONPORT;
276 
277 		if (inp->inp_flags & INP_HIGHPORT) {
278 			first = ipport_hifirstauto;	/* sysctl */
279 			last  = ipport_hilastauto;
280 			lastport = &pcbinfo->lasthi;
281 		} else if (inp->inp_flags & INP_LOWPORT) {
282 			if (p && (error = suser_xxx(0, p, PRISON_ROOT)))
283 				return error;
284 			first = ipport_lowfirstauto;	/* 1023 */
285 			last  = ipport_lowlastauto;	/* 600 */
286 			lastport = &pcbinfo->lastlow;
287 		} else {
288 			first = ipport_firstauto;	/* sysctl */
289 			last  = ipport_lastauto;
290 			lastport = &pcbinfo->lastport;
291 		}
292 		/*
293 		 * Simple check to ensure all ports are not used up causing
294 		 * a deadlock here.
295 		 *
296 		 * We split the two cases (up and down) so that the direction
297 		 * is not being tested on each round of the loop.
298 		 */
299 		if (first > last) {
300 			/*
301 			 * counting down
302 			 */
303 			count = first - last;
304 
305 			do {
306 				if (count-- < 0) {	/* completely used? */
307 					/*
308 					 * Undo any address bind that may have
309 					 * occurred above.
310 					 */
311 					inp->inp_laddr.s_addr = INADDR_ANY;
312 					return (EAGAIN);
313 				}
314 				--*lastport;
315 				if (*lastport > first || *lastport < last)
316 					*lastport = first;
317 				lport = htons(*lastport);
318 			} while (in_pcblookup_local(pcbinfo,
319 				 inp->inp_laddr, lport, wild));
320 		} else {
321 			/*
322 			 * counting up
323 			 */
324 			count = last - first;
325 
326 			do {
327 				if (count-- < 0) {	/* completely used? */
328 					/*
329 					 * Undo any address bind that may have
330 					 * occurred above.
331 					 */
332 					inp->inp_laddr.s_addr = INADDR_ANY;
333 					return (EAGAIN);
334 				}
335 				++*lastport;
336 				if (*lastport < first || *lastport > last)
337 					*lastport = first;
338 				lport = htons(*lastport);
339 			} while (in_pcblookup_local(pcbinfo,
340 				 inp->inp_laddr, lport, wild));
341 		}
342 	}
343 	inp->inp_lport = lport;
344 	if (in_pcbinshash(inp) != 0) {
345 		inp->inp_laddr.s_addr = INADDR_ANY;
346 		inp->inp_lport = 0;
347 		return (EAGAIN);
348 	}
349 	return (0);
350 }
351 
352 /*
353  *   Transform old in_pcbconnect() into an inner subroutine for new
354  *   in_pcbconnect(): Do some validity-checking on the remote
355  *   address (in mbuf 'nam') and then determine local host address
356  *   (i.e., which interface) to use to access that remote host.
357  *
358  *   This preserves definition of in_pcbconnect(), while supporting a
359  *   slightly different version for T/TCP.  (This is more than
360  *   a bit of a kludge, but cleaning up the internal interfaces would
361  *   have forced minor changes in every protocol).
362  */
363 
364 int
365 in_pcbladdr(inp, nam, plocal_sin)
366 	register struct inpcb *inp;
367 	struct sockaddr *nam;
368 	struct sockaddr_in **plocal_sin;
369 {
370 	struct in_ifaddr *ia;
371 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
372 
373 	if (nam->sa_len != sizeof (*sin))
374 		return (EINVAL);
375 	if (sin->sin_family != AF_INET)
376 		return (EAFNOSUPPORT);
377 	if (sin->sin_port == 0)
378 		return (EADDRNOTAVAIL);
379 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
380 		/*
381 		 * If the destination address is INADDR_ANY,
382 		 * use the primary local address.
383 		 * If the supplied address is INADDR_BROADCAST,
384 		 * and the primary interface supports broadcast,
385 		 * choose the broadcast address for that interface.
386 		 */
387 #define	satosin(sa)	((struct sockaddr_in *)(sa))
388 #define sintosa(sin)	((struct sockaddr *)(sin))
389 #define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
390 		if (sin->sin_addr.s_addr == INADDR_ANY)
391 		    sin->sin_addr = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr;
392 		else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
393 		  (in_ifaddrhead.tqh_first->ia_ifp->if_flags & IFF_BROADCAST))
394 		    sin->sin_addr = satosin(&in_ifaddrhead.tqh_first->ia_broadaddr)->sin_addr;
395 	}
396 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
397 		register struct route *ro;
398 
399 		ia = (struct in_ifaddr *)0;
400 		/*
401 		 * If route is known or can be allocated now,
402 		 * our src addr is taken from the i/f, else punt.
403 		 */
404 		ro = &inp->inp_route;
405 		if (ro->ro_rt &&
406 		    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
407 			sin->sin_addr.s_addr ||
408 		    inp->inp_socket->so_options & SO_DONTROUTE)) {
409 			RTFREE(ro->ro_rt);
410 			ro->ro_rt = (struct rtentry *)0;
411 		}
412 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
413 		    (ro->ro_rt == (struct rtentry *)0 ||
414 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
415 			/* No route yet, so try to acquire one */
416 			ro->ro_dst.sa_family = AF_INET;
417 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
418 			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
419 				sin->sin_addr;
420 			rtalloc(ro);
421 		}
422 		/*
423 		 * If we found a route, use the address
424 		 * corresponding to the outgoing interface
425 		 * unless it is the loopback (in case a route
426 		 * to our address on another net goes to loopback).
427 		 */
428 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
429 			ia = ifatoia(ro->ro_rt->rt_ifa);
430 		if (ia == 0) {
431 			u_short fport = sin->sin_port;
432 
433 			sin->sin_port = 0;
434 			ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
435 			if (ia == 0)
436 				ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
437 			sin->sin_port = fport;
438 			if (ia == 0)
439 				ia = in_ifaddrhead.tqh_first;
440 			if (ia == 0)
441 				return (EADDRNOTAVAIL);
442 		}
443 		/*
444 		 * If the destination address is multicast and an outgoing
445 		 * interface has been set as a multicast option, use the
446 		 * address of that interface as our source address.
447 		 */
448 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
449 		    inp->inp_moptions != NULL) {
450 			struct ip_moptions *imo;
451 			struct ifnet *ifp;
452 
453 			imo = inp->inp_moptions;
454 			if (imo->imo_multicast_ifp != NULL) {
455 				ifp = imo->imo_multicast_ifp;
456 				for (ia = in_ifaddrhead.tqh_first; ia;
457 				     ia = ia->ia_link.tqe_next)
458 					if (ia->ia_ifp == ifp)
459 						break;
460 				if (ia == 0)
461 					return (EADDRNOTAVAIL);
462 			}
463 		}
464 	/*
465 	 * Don't do pcblookup call here; return interface in plocal_sin
466 	 * and exit to caller, that will do the lookup.
467 	 */
468 		*plocal_sin = &ia->ia_addr;
469 
470 	}
471 	return(0);
472 }
473 
474 /*
475  * Outer subroutine:
476  * Connect from a socket to a specified address.
477  * Both address and port must be specified in argument sin.
478  * If don't have a local address for this socket yet,
479  * then pick one.
480  */
481 int
482 in_pcbconnect(inp, nam, p)
483 	register struct inpcb *inp;
484 	struct sockaddr *nam;
485 	struct proc *p;
486 {
487 	struct sockaddr_in *ifaddr;
488 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
489 	int error;
490 
491 	/*
492 	 *   Call inner routine, to assign local interface address.
493 	 */
494 	if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
495 		return(error);
496 
497 	if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
498 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
499 	    inp->inp_lport, 0, NULL) != NULL) {
500 		return (EADDRINUSE);
501 	}
502 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
503 		if (inp->inp_lport == 0) {
504 			error = in_pcbbind(inp, (struct sockaddr *)0, p);
505 			if (error)
506 			    return (error);
507 		}
508 		inp->inp_laddr = ifaddr->sin_addr;
509 	}
510 	inp->inp_faddr = sin->sin_addr;
511 	inp->inp_fport = sin->sin_port;
512 	in_pcbrehash(inp);
513 	return (0);
514 }
515 
516 void
517 in_pcbdisconnect(inp)
518 	struct inpcb *inp;
519 {
520 
521 	inp->inp_faddr.s_addr = INADDR_ANY;
522 	inp->inp_fport = 0;
523 	in_pcbrehash(inp);
524 	if (inp->inp_socket->so_state & SS_NOFDREF)
525 		in_pcbdetach(inp);
526 }
527 
528 void
529 in_pcbdetach(inp)
530 	struct inpcb *inp;
531 {
532 	struct socket *so = inp->inp_socket;
533 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
534 
535 #ifdef IPSEC
536 	ipsec4_delete_pcbpolicy(inp);
537 #endif /*IPSEC*/
538 	inp->inp_gencnt = ++ipi->ipi_gencnt;
539 	in_pcbremlists(inp);
540 	so->so_pcb = 0;
541 	sofree(so);
542 	if (inp->inp_options)
543 		(void)m_free(inp->inp_options);
544 	if (inp->inp_route.ro_rt)
545 		rtfree(inp->inp_route.ro_rt);
546 	ip_freemoptions(inp->inp_moptions);
547 	inp->inp_vflag = 0;
548 	zfreei(ipi->ipi_zone, inp);
549 }
550 
551 /*
552  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
553  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
554  * in struct pr_usrreqs, so that protocols can just reference then directly
555  * without the need for a wrapper function.  The socket must have a valid
556  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
557  * except through a kernel programming error, so it is acceptable to panic
558  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
559  * because there actually /is/ a programming error somewhere... XXX)
560  */
561 int
562 in_setsockaddr(so, nam)
563 	struct socket *so;
564 	struct sockaddr **nam;
565 {
566 	int s;
567 	register struct inpcb *inp;
568 	register struct sockaddr_in *sin;
569 
570 	/*
571 	 * Do the malloc first in case it blocks.
572 	 */
573 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
574 	bzero(sin, sizeof *sin);
575 	sin->sin_family = AF_INET;
576 	sin->sin_len = sizeof(*sin);
577 
578 	s = splnet();
579 	inp = sotoinpcb(so);
580 	if (!inp) {
581 		splx(s);
582 		free(sin, M_SONAME);
583 		return ECONNRESET;
584 	}
585 	sin->sin_port = inp->inp_lport;
586 	sin->sin_addr = inp->inp_laddr;
587 	splx(s);
588 
589 	*nam = (struct sockaddr *)sin;
590 	return 0;
591 }
592 
593 int
594 in_setpeeraddr(so, nam)
595 	struct socket *so;
596 	struct sockaddr **nam;
597 {
598 	int s;
599 	struct inpcb *inp;
600 	register struct sockaddr_in *sin;
601 
602 	/*
603 	 * Do the malloc first in case it blocks.
604 	 */
605 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
606 	bzero(sin, sizeof (*sin));
607 	sin->sin_family = AF_INET;
608 	sin->sin_len = sizeof(*sin);
609 
610 	s = splnet();
611 	inp = sotoinpcb(so);
612 	if (!inp) {
613 		splx(s);
614 		free(sin, M_SONAME);
615 		return ECONNRESET;
616 	}
617 	sin->sin_port = inp->inp_fport;
618 	sin->sin_addr = inp->inp_faddr;
619 	splx(s);
620 
621 	*nam = (struct sockaddr *)sin;
622 	return 0;
623 }
624 
625 /*
626  * Pass some notification to all connections of a protocol
627  * associated with address dst.  The local address and/or port numbers
628  * may be specified to limit the search.  The "usual action" will be
629  * taken, depending on the ctlinput cmd.  The caller must filter any
630  * cmds that are uninteresting (e.g., no error in the map).
631  * Call the protocol specific routine (if any) to report
632  * any errors for each matching socket.
633  */
634 void
635 in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify)
636 	struct inpcbhead *head;
637 	struct sockaddr *dst;
638 	u_int fport_arg, lport_arg;
639 	struct in_addr laddr;
640 	int cmd;
641 	void (*notify) __P((struct inpcb *, int));
642 {
643 	register struct inpcb *inp, *oinp;
644 	struct in_addr faddr;
645 	u_short fport = fport_arg, lport = lport_arg;
646 	int errno, s;
647 
648 	if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
649 		return;
650 	faddr = ((struct sockaddr_in *)dst)->sin_addr;
651 	if (faddr.s_addr == INADDR_ANY)
652 		return;
653 
654 	/*
655 	 * Redirects go to all references to the destination,
656 	 * and use in_rtchange to invalidate the route cache.
657 	 * Dead host indications: notify all references to the destination.
658 	 * Otherwise, if we have knowledge of the local port and address,
659 	 * deliver only to that socket.
660 	 */
661 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
662 		fport = 0;
663 		lport = 0;
664 		laddr.s_addr = 0;
665 		if (cmd != PRC_HOSTDEAD)
666 			notify = in_rtchange;
667 	}
668 	errno = inetctlerrmap[cmd];
669 	s = splnet();
670 	for (inp = head->lh_first; inp != NULL;) {
671 #ifdef INET6
672 		if ((inp->inp_vflag & INP_IPV4) == 0) {
673 			inp = LIST_NEXT(inp, inp_list);
674 			continue;
675 		}
676 #endif
677 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
678 		    inp->inp_socket == 0 ||
679 		    (lport && inp->inp_lport != lport) ||
680 		    (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
681 		    (fport && inp->inp_fport != fport)) {
682 			inp = inp->inp_list.le_next;
683 			continue;
684 		}
685 		oinp = inp;
686 		inp = inp->inp_list.le_next;
687 		if (notify)
688 			(*notify)(oinp, errno);
689 	}
690 	splx(s);
691 }
692 
693 /*
694  * Check for alternatives when higher level complains
695  * about service problems.  For now, invalidate cached
696  * routing information.  If the route was created dynamically
697  * (by a redirect), time to try a default gateway again.
698  */
699 void
700 in_losing(inp)
701 	struct inpcb *inp;
702 {
703 	register struct rtentry *rt;
704 	struct rt_addrinfo info;
705 
706 	if ((rt = inp->inp_route.ro_rt)) {
707 		inp->inp_route.ro_rt = 0;
708 		bzero((caddr_t)&info, sizeof(info));
709 		info.rti_info[RTAX_DST] =
710 			(struct sockaddr *)&inp->inp_route.ro_dst;
711 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
712 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
713 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
714 		if (rt->rt_flags & RTF_DYNAMIC)
715 			(void) rtrequest(RTM_DELETE, rt_key(rt),
716 				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
717 				(struct rtentry **)0);
718 		else
719 		/*
720 		 * A new route can be allocated
721 		 * the next time output is attempted.
722 		 */
723 			rtfree(rt);
724 	}
725 }
726 
727 /*
728  * After a routing change, flush old routing
729  * and allocate a (hopefully) better one.
730  */
731 static void
732 in_rtchange(inp, errno)
733 	register struct inpcb *inp;
734 	int errno;
735 {
736 	if (inp->inp_route.ro_rt) {
737 		rtfree(inp->inp_route.ro_rt);
738 		inp->inp_route.ro_rt = 0;
739 		/*
740 		 * A new route can be allocated the next time
741 		 * output is attempted.
742 		 */
743 	}
744 }
745 
746 /*
747  * Lookup a PCB based on the local address and port.
748  */
749 struct inpcb *
750 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
751 	struct inpcbinfo *pcbinfo;
752 	struct in_addr laddr;
753 	u_int lport_arg;
754 	int wild_okay;
755 {
756 	register struct inpcb *inp;
757 	int matchwild = 3, wildcard;
758 	u_short lport = lport_arg;
759 
760 	if (!wild_okay) {
761 		struct inpcbhead *head;
762 		/*
763 		 * Look for an unconnected (wildcard foreign addr) PCB that
764 		 * matches the local address and port we're looking for.
765 		 */
766 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
767 		for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
768 #ifdef INET6
769 			if ((inp->inp_vflag & INP_IPV4) == 0)
770 				continue;
771 #endif
772 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
773 			    inp->inp_laddr.s_addr == laddr.s_addr &&
774 			    inp->inp_lport == lport) {
775 				/*
776 				 * Found.
777 				 */
778 				return (inp);
779 			}
780 		}
781 		/*
782 		 * Not found.
783 		 */
784 		return (NULL);
785 	} else {
786 		struct inpcbporthead *porthash;
787 		struct inpcbport *phd;
788 		struct inpcb *match = NULL;
789 		/*
790 		 * Best fit PCB lookup.
791 		 *
792 		 * First see if this local port is in use by looking on the
793 		 * port hash list.
794 		 */
795 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
796 		    pcbinfo->porthashmask)];
797 		for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
798 			if (phd->phd_port == lport)
799 				break;
800 		}
801 		if (phd != NULL) {
802 			/*
803 			 * Port is in use by one or more PCBs. Look for best
804 			 * fit.
805 			 */
806 			for (inp = phd->phd_pcblist.lh_first; inp != NULL;
807 			    inp = inp->inp_portlist.le_next) {
808 				wildcard = 0;
809 #ifdef INET6
810 				if ((inp->inp_vflag & INP_IPV4) == 0)
811 					continue;
812 #endif
813 				if (inp->inp_faddr.s_addr != INADDR_ANY)
814 					wildcard++;
815 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
816 					if (laddr.s_addr == INADDR_ANY)
817 						wildcard++;
818 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
819 						continue;
820 				} else {
821 					if (laddr.s_addr != INADDR_ANY)
822 						wildcard++;
823 				}
824 				if (wildcard < matchwild) {
825 					match = inp;
826 					matchwild = wildcard;
827 					if (matchwild == 0) {
828 						break;
829 					}
830 				}
831 			}
832 		}
833 		return (match);
834 	}
835 }
836 
837 /*
838  * Lookup PCB in hash list.
839  */
840 struct inpcb *
841 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
842 		  ifp)
843 	struct inpcbinfo *pcbinfo;
844 	struct in_addr faddr, laddr;
845 	u_int fport_arg, lport_arg;
846 	int wildcard;
847 	struct ifnet *ifp;
848 {
849 	struct inpcbhead *head;
850 	register struct inpcb *inp;
851 	u_short fport = fport_arg, lport = lport_arg;
852 
853 	/*
854 	 * First look for an exact match.
855 	 */
856 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
857 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
858 #ifdef INET6
859 		if ((inp->inp_vflag & INP_IPV4) == 0)
860 			continue;
861 #endif
862 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
863 		    inp->inp_laddr.s_addr == laddr.s_addr &&
864 		    inp->inp_fport == fport &&
865 		    inp->inp_lport == lport) {
866 			/*
867 			 * Found.
868 			 */
869 			return (inp);
870 		}
871 	}
872 	if (wildcard) {
873 		struct inpcb *local_wild = NULL;
874 #if defined(INET6)
875 		struct inpcb *local_wild_mapped = NULL;
876 #endif /* defined(INET6) */
877 
878 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
879 		for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
880 #ifdef INET6
881 			if ((inp->inp_vflag & INP_IPV4) == 0)
882 				continue;
883 #endif
884 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
885 			    inp->inp_lport == lport) {
886 #if defined(NFAITH) && NFAITH > 0
887 				if (ifp && ifp->if_type == IFT_FAITH &&
888 				    (inp->inp_flags & INP_FAITH) == 0)
889 					continue;
890 #endif
891 				if (inp->inp_laddr.s_addr == laddr.s_addr)
892 					return (inp);
893 				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
894 #if defined(INET6)
895 					if (INP_CHECK_SOCKAF(inp->inp_socket,
896 							     AF_INET6))
897 						local_wild_mapped = inp;
898 					else
899 #endif /* defined(INET6) */
900 					local_wild = inp;
901 				}
902 			}
903 		}
904 #if defined(INET6)
905 		if (local_wild == NULL)
906 			return (local_wild_mapped);
907 #endif /* defined(INET6) */
908 		return (local_wild);
909 	}
910 
911 	/*
912 	 * Not found.
913 	 */
914 	return (NULL);
915 }
916 
917 /*
918  * Insert PCB onto various hash lists.
919  */
920 int
921 in_pcbinshash(inp)
922 	struct inpcb *inp;
923 {
924 	struct inpcbhead *pcbhash;
925 	struct inpcbporthead *pcbporthash;
926 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
927 	struct inpcbport *phd;
928 	u_int32_t hashkey_faddr;
929 
930 #ifdef INET6
931 	if (inp->inp_vflag & INP_IPV6)
932 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
933 	else
934 #endif /* INET6 */
935 	hashkey_faddr = inp->inp_faddr.s_addr;
936 
937 	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
938 		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
939 
940 	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
941 	    pcbinfo->porthashmask)];
942 
943 	/*
944 	 * Go through port list and look for a head for this lport.
945 	 */
946 	for (phd = pcbporthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
947 		if (phd->phd_port == inp->inp_lport)
948 			break;
949 	}
950 	/*
951 	 * If none exists, malloc one and tack it on.
952 	 */
953 	if (phd == NULL) {
954 		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
955 		if (phd == NULL) {
956 			return (ENOBUFS); /* XXX */
957 		}
958 		phd->phd_port = inp->inp_lport;
959 		LIST_INIT(&phd->phd_pcblist);
960 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
961 	}
962 	inp->inp_phd = phd;
963 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
964 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
965 	return (0);
966 }
967 
968 /*
969  * Move PCB to the proper hash bucket when { faddr, fport } have  been
970  * changed. NOTE: This does not handle the case of the lport changing (the
971  * hashed port list would have to be updated as well), so the lport must
972  * not change after in_pcbinshash() has been called.
973  */
974 void
975 in_pcbrehash(inp)
976 	struct inpcb *inp;
977 {
978 	struct inpcbhead *head;
979 	u_int32_t hashkey_faddr;
980 
981 #ifdef INET6
982 	if (inp->inp_vflag & INP_IPV6)
983 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
984 	else
985 #endif /* INET6 */
986 	hashkey_faddr = inp->inp_faddr.s_addr;
987 
988 	head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
989 		inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
990 
991 	LIST_REMOVE(inp, inp_hash);
992 	LIST_INSERT_HEAD(head, inp, inp_hash);
993 }
994 
995 /*
996  * Remove PCB from various lists.
997  */
998 void
999 in_pcbremlists(inp)
1000 	struct inpcb *inp;
1001 {
1002 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1003 	if (inp->inp_lport) {
1004 		struct inpcbport *phd = inp->inp_phd;
1005 
1006 		LIST_REMOVE(inp, inp_hash);
1007 		LIST_REMOVE(inp, inp_portlist);
1008 		if (phd->phd_pcblist.lh_first == NULL) {
1009 			LIST_REMOVE(phd, phd_hash);
1010 			free(phd, M_PCB);
1011 		}
1012 	}
1013 	LIST_REMOVE(inp, inp_list);
1014 	inp->inp_pcbinfo->ipi_count--;
1015 }
1016 
1017 int
1018 prison_xinpcb(struct proc *p, struct inpcb *inp)
1019 {
1020 	if (!p->p_prison)
1021 		return (0);
1022 	if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip)
1023 		return (0);
1024 	return (1);
1025 }
1026