xref: /freebsd/sys/netinet/in_pcb.c (revision c807777a43ef2b59786fa8a1a35c1f154fd069e5)
1 /*
2  * Copyright (c) 1982, 1986, 1991, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_ipsec.h"
38 #include "opt_inet6.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/proc.h>
49 #include <sys/jail.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 
53 #include <machine/limits.h>
54 
55 #include <vm/vm_zone.h>
56 
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 
61 #include <netinet/in.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/in_var.h>
64 #include <netinet/ip_var.h>
65 #ifdef INET6
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #endif /* INET6 */
69 
70 #include "faith.h"
71 
72 #ifdef IPSEC
73 #include <netinet6/ipsec.h>
74 #include <netkey/key.h>
75 #include <netkey/key_debug.h>
76 #endif /* IPSEC */
77 
78 struct	in_addr zeroin_addr;
79 
80 static void	in_rtchange __P((struct inpcb *, int));
81 
82 /*
83  * These configure the range of local port addresses assigned to
84  * "unspecified" outgoing connections/packets/whatever.
85  */
86 int	ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
87 int	ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
88 int	ipport_firstauto = IPPORT_RESERVED;		/* 1024 */
89 int	ipport_lastauto  = IPPORT_USERRESERVED;		/* 5000 */
90 int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
91 int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
92 
93 #define RANGECHK(var, min, max) \
94 	if ((var) < (min)) { (var) = (min); } \
95 	else if ((var) > (max)) { (var) = (max); }
96 
97 static int
98 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
99 {
100 	int error = sysctl_handle_int(oidp,
101 		oidp->oid_arg1, oidp->oid_arg2, req);
102 	if (!error) {
103 		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
104 		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
105 		RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
106 		RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
107 		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
108 		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
109 	}
110 	return error;
111 }
112 
113 #undef RANGECHK
114 
115 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
116 
117 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
118 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
119 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
120 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
121 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
122 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
123 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
124 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
125 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
126 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
127 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
128 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
129 
130 /*
131  * in_pcb.c: manage the Protocol Control Blocks.
132  *
133  * NOTE: It is assumed that most of these functions will be called at
134  * splnet(). XXX - There are, unfortunately, a few exceptions to this
135  * rule that should be fixed.
136  */
137 
138 /*
139  * Allocate a PCB and associate it with the socket.
140  */
141 int
142 in_pcballoc(so, pcbinfo, p)
143 	struct socket *so;
144 	struct inpcbinfo *pcbinfo;
145 	struct proc *p;
146 {
147 	register struct inpcb *inp;
148 
149 	inp = zalloci(pcbinfo->ipi_zone);
150 	if (inp == NULL)
151 		return (ENOBUFS);
152 	bzero((caddr_t)inp, sizeof(*inp));
153 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
154 	inp->inp_pcbinfo = pcbinfo;
155 	inp->inp_socket = so;
156 	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
157 	pcbinfo->ipi_count++;
158 	so->so_pcb = (caddr_t)inp;
159 	return (0);
160 }
161 
162 int
163 in_pcbbind(inp, nam, p)
164 	register struct inpcb *inp;
165 	struct sockaddr *nam;
166 	struct proc *p;
167 {
168 	register struct socket *so = inp->inp_socket;
169 	unsigned short *lastport;
170 	struct sockaddr_in *sin;
171 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
172 	u_short lport = 0;
173 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
174 	int error, prison = 0;
175 
176 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
177 		return (EADDRNOTAVAIL);
178 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
179 		return (EINVAL);
180 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
181 		wild = 1;
182 	if (nam) {
183 		sin = (struct sockaddr_in *)nam;
184 		if (nam->sa_len != sizeof (*sin))
185 			return (EINVAL);
186 #ifdef notdef
187 		/*
188 		 * We should check the family, but old programs
189 		 * incorrectly fail to initialize it.
190 		 */
191 		if (sin->sin_family != AF_INET)
192 			return (EAFNOSUPPORT);
193 #endif
194 		if (prison_ip(p, 0, &sin->sin_addr.s_addr))
195 			return(EINVAL);
196 		lport = sin->sin_port;
197 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
198 			/*
199 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
200 			 * allow complete duplication of binding if
201 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
202 			 * and a multicast address is bound on both
203 			 * new and duplicated sockets.
204 			 */
205 			if (so->so_options & SO_REUSEADDR)
206 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
207 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
208 			sin->sin_port = 0;		/* yech... */
209 			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
210 				return (EADDRNOTAVAIL);
211 		}
212 		if (lport) {
213 			struct inpcb *t;
214 
215 			/* GROSS */
216 			if (ntohs(lport) < IPPORT_RESERVED && p &&
217 			    suser_xxx(0, p, PRISON_ROOT))
218 				return (EACCES);
219 			if (p && p->p_prison)
220 				prison = 1;
221 			if (so->so_cred->cr_uid != 0 &&
222 			    !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
223 				t = in_pcblookup_local(inp->inp_pcbinfo,
224 				    sin->sin_addr, lport,
225 				    prison ? 0 :  INPLOOKUP_WILDCARD);
226 				if (t &&
227 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
228 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
229 				     (t->inp_socket->so_options &
230 					 SO_REUSEPORT) == 0) &&
231 				    (so->so_cred->cr_uid !=
232 				     t->inp_socket->so_cred->cr_uid)) {
233 #if defined(INET6)
234 					if (ip6_mapped_addr_on == 0 ||
235 					    ntohl(sin->sin_addr.s_addr) !=
236 					    INADDR_ANY ||
237 					    ntohl(t->inp_laddr.s_addr) !=
238 					    INADDR_ANY ||
239 					    INP_SOCKAF(so) ==
240 					    INP_SOCKAF(t->inp_socket))
241 #endif /* defined(INET6) */
242 					return (EADDRINUSE);
243 				}
244 			}
245 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
246 			    lport, prison ? 0 : wild);
247 			if (t &&
248 			    (reuseport & t->inp_socket->so_options) == 0) {
249 #if defined(INET6)
250 				if (ip6_mapped_addr_on == 0 ||
251 				    ntohl(sin->sin_addr.s_addr) !=
252 				    INADDR_ANY ||
253 				    ntohl(t->inp_laddr.s_addr) !=
254 				    INADDR_ANY ||
255 				    INP_SOCKAF(so) ==
256 				    INP_SOCKAF(t->inp_socket))
257 #endif /* defined(INET6) */
258 				return (EADDRINUSE);
259 			}
260 		}
261 		inp->inp_laddr = sin->sin_addr;
262 	}
263 	if (lport == 0) {
264 		ushort first, last;
265 		int count;
266 
267 		if (prison_ip(p, 0, &inp->inp_laddr.s_addr ))
268 			return (EINVAL);
269 		inp->inp_flags |= INP_ANONPORT;
270 
271 		if (inp->inp_flags & INP_HIGHPORT) {
272 			first = ipport_hifirstauto;	/* sysctl */
273 			last  = ipport_hilastauto;
274 			lastport = &pcbinfo->lasthi;
275 		} else if (inp->inp_flags & INP_LOWPORT) {
276 			if (p && (error = suser_xxx(0, p, PRISON_ROOT)))
277 				return error;
278 			first = ipport_lowfirstauto;	/* 1023 */
279 			last  = ipport_lowlastauto;	/* 600 */
280 			lastport = &pcbinfo->lastlow;
281 		} else {
282 			first = ipport_firstauto;	/* sysctl */
283 			last  = ipport_lastauto;
284 			lastport = &pcbinfo->lastport;
285 		}
286 		/*
287 		 * Simple check to ensure all ports are not used up causing
288 		 * a deadlock here.
289 		 *
290 		 * We split the two cases (up and down) so that the direction
291 		 * is not being tested on each round of the loop.
292 		 */
293 		if (first > last) {
294 			/*
295 			 * counting down
296 			 */
297 			count = first - last;
298 
299 			do {
300 				if (count-- < 0) {	/* completely used? */
301 					/*
302 					 * Undo any address bind that may have
303 					 * occurred above.
304 					 */
305 					inp->inp_laddr.s_addr = INADDR_ANY;
306 					return (EAGAIN);
307 				}
308 				--*lastport;
309 				if (*lastport > first || *lastport < last)
310 					*lastport = first;
311 				lport = htons(*lastport);
312 			} while (in_pcblookup_local(pcbinfo,
313 				 inp->inp_laddr, lport, wild));
314 		} else {
315 			/*
316 			 * counting up
317 			 */
318 			count = last - first;
319 
320 			do {
321 				if (count-- < 0) {	/* completely used? */
322 					/*
323 					 * Undo any address bind that may have
324 					 * occurred above.
325 					 */
326 					inp->inp_laddr.s_addr = INADDR_ANY;
327 					return (EAGAIN);
328 				}
329 				++*lastport;
330 				if (*lastport < first || *lastport > last)
331 					*lastport = first;
332 				lport = htons(*lastport);
333 			} while (in_pcblookup_local(pcbinfo,
334 				 inp->inp_laddr, lport, wild));
335 		}
336 	}
337 	inp->inp_lport = lport;
338 	if (in_pcbinshash(inp) != 0) {
339 		inp->inp_laddr.s_addr = INADDR_ANY;
340 		inp->inp_lport = 0;
341 		return (EAGAIN);
342 	}
343 	return (0);
344 }
345 
346 /*
347  *   Transform old in_pcbconnect() into an inner subroutine for new
348  *   in_pcbconnect(): Do some validity-checking on the remote
349  *   address (in mbuf 'nam') and then determine local host address
350  *   (i.e., which interface) to use to access that remote host.
351  *
352  *   This preserves definition of in_pcbconnect(), while supporting a
353  *   slightly different version for T/TCP.  (This is more than
354  *   a bit of a kludge, but cleaning up the internal interfaces would
355  *   have forced minor changes in every protocol).
356  */
357 
358 int
359 in_pcbladdr(inp, nam, plocal_sin)
360 	register struct inpcb *inp;
361 	struct sockaddr *nam;
362 	struct sockaddr_in **plocal_sin;
363 {
364 	struct in_ifaddr *ia;
365 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
366 
367 	if (nam->sa_len != sizeof (*sin))
368 		return (EINVAL);
369 	if (sin->sin_family != AF_INET)
370 		return (EAFNOSUPPORT);
371 	if (sin->sin_port == 0)
372 		return (EADDRNOTAVAIL);
373 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
374 		/*
375 		 * If the destination address is INADDR_ANY,
376 		 * use the primary local address.
377 		 * If the supplied address is INADDR_BROADCAST,
378 		 * and the primary interface supports broadcast,
379 		 * choose the broadcast address for that interface.
380 		 */
381 #define	satosin(sa)	((struct sockaddr_in *)(sa))
382 #define sintosa(sin)	((struct sockaddr *)(sin))
383 #define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
384 		if (sin->sin_addr.s_addr == INADDR_ANY)
385 		    sin->sin_addr = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr;
386 		else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
387 		  (in_ifaddrhead.tqh_first->ia_ifp->if_flags & IFF_BROADCAST))
388 		    sin->sin_addr = satosin(&in_ifaddrhead.tqh_first->ia_broadaddr)->sin_addr;
389 	}
390 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
391 		register struct route *ro;
392 
393 		ia = (struct in_ifaddr *)0;
394 		/*
395 		 * If route is known or can be allocated now,
396 		 * our src addr is taken from the i/f, else punt.
397 		 */
398 		ro = &inp->inp_route;
399 		if (ro->ro_rt &&
400 		    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
401 			sin->sin_addr.s_addr ||
402 		    inp->inp_socket->so_options & SO_DONTROUTE)) {
403 			RTFREE(ro->ro_rt);
404 			ro->ro_rt = (struct rtentry *)0;
405 		}
406 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
407 		    (ro->ro_rt == (struct rtentry *)0 ||
408 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
409 			/* No route yet, so try to acquire one */
410 			ro->ro_dst.sa_family = AF_INET;
411 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
412 			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
413 				sin->sin_addr;
414 			rtalloc(ro);
415 		}
416 		/*
417 		 * If we found a route, use the address
418 		 * corresponding to the outgoing interface
419 		 * unless it is the loopback (in case a route
420 		 * to our address on another net goes to loopback).
421 		 */
422 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
423 			ia = ifatoia(ro->ro_rt->rt_ifa);
424 		if (ia == 0) {
425 			u_short fport = sin->sin_port;
426 
427 			sin->sin_port = 0;
428 			ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
429 			if (ia == 0)
430 				ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
431 			sin->sin_port = fport;
432 			if (ia == 0)
433 				ia = in_ifaddrhead.tqh_first;
434 			if (ia == 0)
435 				return (EADDRNOTAVAIL);
436 		}
437 		/*
438 		 * If the destination address is multicast and an outgoing
439 		 * interface has been set as a multicast option, use the
440 		 * address of that interface as our source address.
441 		 */
442 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
443 		    inp->inp_moptions != NULL) {
444 			struct ip_moptions *imo;
445 			struct ifnet *ifp;
446 
447 			imo = inp->inp_moptions;
448 			if (imo->imo_multicast_ifp != NULL) {
449 				ifp = imo->imo_multicast_ifp;
450 				for (ia = in_ifaddrhead.tqh_first; ia;
451 				     ia = ia->ia_link.tqe_next)
452 					if (ia->ia_ifp == ifp)
453 						break;
454 				if (ia == 0)
455 					return (EADDRNOTAVAIL);
456 			}
457 		}
458 	/*
459 	 * Don't do pcblookup call here; return interface in plocal_sin
460 	 * and exit to caller, that will do the lookup.
461 	 */
462 		*plocal_sin = &ia->ia_addr;
463 
464 	}
465 	return(0);
466 }
467 
468 /*
469  * Outer subroutine:
470  * Connect from a socket to a specified address.
471  * Both address and port must be specified in argument sin.
472  * If don't have a local address for this socket yet,
473  * then pick one.
474  */
475 int
476 in_pcbconnect(inp, nam, p)
477 	register struct inpcb *inp;
478 	struct sockaddr *nam;
479 	struct proc *p;
480 {
481 	struct sockaddr_in *ifaddr;
482 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
483 	int error;
484 
485 	/*
486 	 *   Call inner routine, to assign local interface address.
487 	 */
488 	if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
489 		return(error);
490 
491 	if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
492 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
493 	    inp->inp_lport, 0, NULL) != NULL) {
494 		return (EADDRINUSE);
495 	}
496 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
497 		if (inp->inp_lport == 0) {
498 			error = in_pcbbind(inp, (struct sockaddr *)0, p);
499 			if (error)
500 			    return (error);
501 		}
502 		inp->inp_laddr = ifaddr->sin_addr;
503 	}
504 	inp->inp_faddr = sin->sin_addr;
505 	inp->inp_fport = sin->sin_port;
506 	in_pcbrehash(inp);
507 	return (0);
508 }
509 
510 void
511 in_pcbdisconnect(inp)
512 	struct inpcb *inp;
513 {
514 
515 	inp->inp_faddr.s_addr = INADDR_ANY;
516 	inp->inp_fport = 0;
517 	in_pcbrehash(inp);
518 	if (inp->inp_socket->so_state & SS_NOFDREF)
519 		in_pcbdetach(inp);
520 }
521 
522 void
523 in_pcbdetach(inp)
524 	struct inpcb *inp;
525 {
526 	struct socket *so = inp->inp_socket;
527 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
528 
529 #ifdef IPSEC
530 	ipsec4_delete_pcbpolicy(inp);
531 #endif /*IPSEC*/
532 	inp->inp_gencnt = ++ipi->ipi_gencnt;
533 	in_pcbremlists(inp);
534 	so->so_pcb = 0;
535 	sofree(so);
536 	if (inp->inp_options)
537 		(void)m_free(inp->inp_options);
538 	if (inp->inp_route.ro_rt)
539 		rtfree(inp->inp_route.ro_rt);
540 	ip_freemoptions(inp->inp_moptions);
541 	inp->inp_vflag = 0;
542 	zfreei(ipi->ipi_zone, inp);
543 }
544 
545 /*
546  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
547  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
548  * in struct pr_usrreqs, so that protocols can just reference then directly
549  * without the need for a wrapper function.  The socket must have a valid
550  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
551  * except through a kernel programming error, so it is acceptable to panic
552  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
553  * because there actually /is/ a programming error somewhere... XXX)
554  */
555 int
556 in_setsockaddr(so, nam)
557 	struct socket *so;
558 	struct sockaddr **nam;
559 {
560 	int s;
561 	register struct inpcb *inp;
562 	register struct sockaddr_in *sin;
563 
564 	/*
565 	 * Do the malloc first in case it blocks.
566 	 */
567 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
568 	bzero(sin, sizeof *sin);
569 	sin->sin_family = AF_INET;
570 	sin->sin_len = sizeof(*sin);
571 
572 	s = splnet();
573 	inp = sotoinpcb(so);
574 	if (!inp) {
575 		splx(s);
576 		free(sin, M_SONAME);
577 		return EINVAL;
578 	}
579 	sin->sin_port = inp->inp_lport;
580 	sin->sin_addr = inp->inp_laddr;
581 	splx(s);
582 
583 	*nam = (struct sockaddr *)sin;
584 	return 0;
585 }
586 
587 int
588 in_setpeeraddr(so, nam)
589 	struct socket *so;
590 	struct sockaddr **nam;
591 {
592 	int s;
593 	struct inpcb *inp;
594 	register struct sockaddr_in *sin;
595 
596 	/*
597 	 * Do the malloc first in case it blocks.
598 	 */
599 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
600 	bzero((caddr_t)sin, sizeof (*sin));
601 	sin->sin_family = AF_INET;
602 	sin->sin_len = sizeof(*sin);
603 
604 	s = splnet();
605 	inp = sotoinpcb(so);
606 	if (!inp) {
607 		splx(s);
608 		free(sin, M_SONAME);
609 		return EINVAL;
610 	}
611 	sin->sin_port = inp->inp_fport;
612 	sin->sin_addr = inp->inp_faddr;
613 	splx(s);
614 
615 	*nam = (struct sockaddr *)sin;
616 	return 0;
617 }
618 
619 /*
620  * Pass some notification to all connections of a protocol
621  * associated with address dst.  The local address and/or port numbers
622  * may be specified to limit the search.  The "usual action" will be
623  * taken, depending on the ctlinput cmd.  The caller must filter any
624  * cmds that are uninteresting (e.g., no error in the map).
625  * Call the protocol specific routine (if any) to report
626  * any errors for each matching socket.
627  */
628 void
629 in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify)
630 	struct inpcbhead *head;
631 	struct sockaddr *dst;
632 	u_int fport_arg, lport_arg;
633 	struct in_addr laddr;
634 	int cmd;
635 	void (*notify) __P((struct inpcb *, int));
636 {
637 	register struct inpcb *inp, *oinp;
638 	struct in_addr faddr;
639 	u_short fport = fport_arg, lport = lport_arg;
640 	int errno, s;
641 
642 	if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
643 		return;
644 	faddr = ((struct sockaddr_in *)dst)->sin_addr;
645 	if (faddr.s_addr == INADDR_ANY)
646 		return;
647 
648 	/*
649 	 * Redirects go to all references to the destination,
650 	 * and use in_rtchange to invalidate the route cache.
651 	 * Dead host indications: notify all references to the destination.
652 	 * Otherwise, if we have knowledge of the local port and address,
653 	 * deliver only to that socket.
654 	 */
655 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
656 		fport = 0;
657 		lport = 0;
658 		laddr.s_addr = 0;
659 		if (cmd != PRC_HOSTDEAD)
660 			notify = in_rtchange;
661 	}
662 	errno = inetctlerrmap[cmd];
663 	s = splnet();
664 	for (inp = head->lh_first; inp != NULL;) {
665 #ifdef INET6
666 		if ((inp->inp_vflag & INP_IPV4) == 0) {
667 			inp = LIST_NEXT(inp, inp_list);
668 			continue;
669 		}
670 #endif
671 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
672 		    inp->inp_socket == 0 ||
673 		    (lport && inp->inp_lport != lport) ||
674 		    (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
675 		    (fport && inp->inp_fport != fport)) {
676 			inp = inp->inp_list.le_next;
677 			continue;
678 		}
679 		oinp = inp;
680 		inp = inp->inp_list.le_next;
681 		if (notify)
682 			(*notify)(oinp, errno);
683 	}
684 	splx(s);
685 }
686 
687 /*
688  * Check for alternatives when higher level complains
689  * about service problems.  For now, invalidate cached
690  * routing information.  If the route was created dynamically
691  * (by a redirect), time to try a default gateway again.
692  */
693 void
694 in_losing(inp)
695 	struct inpcb *inp;
696 {
697 	register struct rtentry *rt;
698 	struct rt_addrinfo info;
699 
700 	if ((rt = inp->inp_route.ro_rt)) {
701 		inp->inp_route.ro_rt = 0;
702 		bzero((caddr_t)&info, sizeof(info));
703 		info.rti_info[RTAX_DST] =
704 			(struct sockaddr *)&inp->inp_route.ro_dst;
705 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
706 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
707 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
708 		if (rt->rt_flags & RTF_DYNAMIC)
709 			(void) rtrequest(RTM_DELETE, rt_key(rt),
710 				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
711 				(struct rtentry **)0);
712 		else
713 		/*
714 		 * A new route can be allocated
715 		 * the next time output is attempted.
716 		 */
717 			rtfree(rt);
718 	}
719 }
720 
721 /*
722  * After a routing change, flush old routing
723  * and allocate a (hopefully) better one.
724  */
725 static void
726 in_rtchange(inp, errno)
727 	register struct inpcb *inp;
728 	int errno;
729 {
730 	if (inp->inp_route.ro_rt) {
731 		rtfree(inp->inp_route.ro_rt);
732 		inp->inp_route.ro_rt = 0;
733 		/*
734 		 * A new route can be allocated the next time
735 		 * output is attempted.
736 		 */
737 	}
738 }
739 
740 /*
741  * Lookup a PCB based on the local address and port.
742  */
743 struct inpcb *
744 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
745 	struct inpcbinfo *pcbinfo;
746 	struct in_addr laddr;
747 	u_int lport_arg;
748 	int wild_okay;
749 {
750 	register struct inpcb *inp;
751 	int matchwild = 3, wildcard;
752 	u_short lport = lport_arg;
753 
754 	if (!wild_okay) {
755 		struct inpcbhead *head;
756 		/*
757 		 * Look for an unconnected (wildcard foreign addr) PCB that
758 		 * matches the local address and port we're looking for.
759 		 */
760 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
761 		for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
762 #ifdef INET6
763 			if ((inp->inp_vflag & INP_IPV4) == 0)
764 				continue;
765 #endif
766 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
767 			    inp->inp_laddr.s_addr == laddr.s_addr &&
768 			    inp->inp_lport == lport) {
769 				/*
770 				 * Found.
771 				 */
772 				return (inp);
773 			}
774 		}
775 		/*
776 		 * Not found.
777 		 */
778 		return (NULL);
779 	} else {
780 		struct inpcbporthead *porthash;
781 		struct inpcbport *phd;
782 		struct inpcb *match = NULL;
783 		/*
784 		 * Best fit PCB lookup.
785 		 *
786 		 * First see if this local port is in use by looking on the
787 		 * port hash list.
788 		 */
789 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
790 		    pcbinfo->porthashmask)];
791 		for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
792 			if (phd->phd_port == lport)
793 				break;
794 		}
795 		if (phd != NULL) {
796 			/*
797 			 * Port is in use by one or more PCBs. Look for best
798 			 * fit.
799 			 */
800 			for (inp = phd->phd_pcblist.lh_first; inp != NULL;
801 			    inp = inp->inp_portlist.le_next) {
802 				wildcard = 0;
803 #ifdef INET6
804 				if ((inp->inp_vflag & INP_IPV4) == 0)
805 					continue;
806 #endif
807 				if (inp->inp_faddr.s_addr != INADDR_ANY)
808 					wildcard++;
809 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
810 					if (laddr.s_addr == INADDR_ANY)
811 						wildcard++;
812 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
813 						continue;
814 				} else {
815 					if (laddr.s_addr != INADDR_ANY)
816 						wildcard++;
817 				}
818 				if (wildcard < matchwild) {
819 					match = inp;
820 					matchwild = wildcard;
821 					if (matchwild == 0) {
822 						break;
823 					}
824 				}
825 			}
826 		}
827 		return (match);
828 	}
829 }
830 
831 /*
832  * Lookup PCB in hash list.
833  */
834 struct inpcb *
835 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
836 		  ifp)
837 	struct inpcbinfo *pcbinfo;
838 	struct in_addr faddr, laddr;
839 	u_int fport_arg, lport_arg;
840 	int wildcard;
841 	struct ifnet *ifp;
842 {
843 	struct inpcbhead *head;
844 	register struct inpcb *inp;
845 	u_short fport = fport_arg, lport = lport_arg;
846 
847 	/*
848 	 * First look for an exact match.
849 	 */
850 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
851 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
852 #ifdef INET6
853 		if ((inp->inp_vflag & INP_IPV4) == 0)
854 			continue;
855 #endif
856 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
857 		    inp->inp_laddr.s_addr == laddr.s_addr &&
858 		    inp->inp_fport == fport &&
859 		    inp->inp_lport == lport) {
860 			/*
861 			 * Found.
862 			 */
863 			return (inp);
864 		}
865 	}
866 	if (wildcard) {
867 		struct inpcb *local_wild = NULL;
868 #if defined(INET6)
869 		struct inpcb *local_wild_mapped = NULL;
870 #endif /* defined(INET6) */
871 
872 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
873 		for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
874 #ifdef INET6
875 			if ((inp->inp_vflag & INP_IPV4) == 0)
876 				continue;
877 #endif
878 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
879 			    inp->inp_lport == lport) {
880 #if defined(NFAITH) && NFAITH > 0
881 				if (ifp && ifp->if_type == IFT_FAITH &&
882 				    (inp->inp_flags & INP_FAITH) == 0)
883 					continue;
884 #endif
885 				if (inp->inp_laddr.s_addr == laddr.s_addr)
886 					return (inp);
887 				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
888 #if defined(INET6)
889 					if (INP_CHECK_SOCKAF(inp->inp_socket,
890 							     AF_INET6))
891 						local_wild_mapped = inp;
892 					else
893 #endif /* defined(INET6) */
894 					local_wild = inp;
895 				}
896 			}
897 		}
898 #if defined(INET6)
899 		if (local_wild == NULL)
900 			return (local_wild_mapped);
901 #endif /* defined(INET6) */
902 		return (local_wild);
903 	}
904 
905 	/*
906 	 * Not found.
907 	 */
908 	return (NULL);
909 }
910 
911 /*
912  * Insert PCB onto various hash lists.
913  */
914 int
915 in_pcbinshash(inp)
916 	struct inpcb *inp;
917 {
918 	struct inpcbhead *pcbhash;
919 	struct inpcbporthead *pcbporthash;
920 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
921 	struct inpcbport *phd;
922 	u_int32_t hashkey_faddr;
923 
924 #ifdef INET6
925 	if (inp->inp_vflag & INP_IPV6)
926 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
927 	else
928 #endif /* INET6 */
929 	hashkey_faddr = inp->inp_faddr.s_addr;
930 
931 	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
932 		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
933 
934 	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
935 	    pcbinfo->porthashmask)];
936 
937 	/*
938 	 * Go through port list and look for a head for this lport.
939 	 */
940 	for (phd = pcbporthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
941 		if (phd->phd_port == inp->inp_lport)
942 			break;
943 	}
944 	/*
945 	 * If none exists, malloc one and tack it on.
946 	 */
947 	if (phd == NULL) {
948 		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
949 		if (phd == NULL) {
950 			return (ENOBUFS); /* XXX */
951 		}
952 		phd->phd_port = inp->inp_lport;
953 		LIST_INIT(&phd->phd_pcblist);
954 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
955 	}
956 	inp->inp_phd = phd;
957 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
958 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
959 	return (0);
960 }
961 
962 /*
963  * Move PCB to the proper hash bucket when { faddr, fport } have  been
964  * changed. NOTE: This does not handle the case of the lport changing (the
965  * hashed port list would have to be updated as well), so the lport must
966  * not change after in_pcbinshash() has been called.
967  */
968 void
969 in_pcbrehash(inp)
970 	struct inpcb *inp;
971 {
972 	struct inpcbhead *head;
973 	u_int32_t hashkey_faddr;
974 
975 #ifdef INET6
976 	if (inp->inp_vflag & INP_IPV6)
977 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
978 	else
979 #endif /* INET6 */
980 	hashkey_faddr = inp->inp_faddr.s_addr;
981 
982 	head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
983 		inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
984 
985 	LIST_REMOVE(inp, inp_hash);
986 	LIST_INSERT_HEAD(head, inp, inp_hash);
987 }
988 
989 /*
990  * Remove PCB from various lists.
991  */
992 void
993 in_pcbremlists(inp)
994 	struct inpcb *inp;
995 {
996 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
997 	if (inp->inp_lport) {
998 		struct inpcbport *phd = inp->inp_phd;
999 
1000 		LIST_REMOVE(inp, inp_hash);
1001 		LIST_REMOVE(inp, inp_portlist);
1002 		if (phd->phd_pcblist.lh_first == NULL) {
1003 			LIST_REMOVE(phd, phd_hash);
1004 			free(phd, M_PCB);
1005 		}
1006 	}
1007 	LIST_REMOVE(inp, inp_list);
1008 	inp->inp_pcbinfo->ipi_count--;
1009 }
1010 
1011 int
1012 prison_xinpcb(struct proc *p, struct inpcb *inp)
1013 {
1014 	if (!p->p_prison)
1015 		return (0);
1016 	if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip)
1017 		return (0);
1018 	return (1);
1019 }
1020