xref: /freebsd/sys/netinet/in_pcb.c (revision ce4946daa5ce852d28008dac492029500ab2ee95)
1 /*
2  * Copyright (c) 1982, 1986, 1991, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_ipsec.h"
38 #include "opt_inet6.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/proc.h>
49 #include <sys/jail.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 
53 #include <machine/limits.h>
54 
55 #include <vm/vm_zone.h>
56 
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 
61 #include <netinet/in.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/in_var.h>
64 #include <netinet/ip_var.h>
65 #ifdef INET6
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #endif /* INET6 */
69 
70 #include "faith.h"
71 
72 #ifdef IPSEC
73 #include <netinet6/ipsec.h>
74 #include <netkey/key.h>
75 #endif /* IPSEC */
76 
77 struct	in_addr zeroin_addr;
78 
79 /*
80  * These configure the range of local port addresses assigned to
81  * "unspecified" outgoing connections/packets/whatever.
82  */
83 int	ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
84 int	ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
85 int	ipport_firstauto = IPPORT_RESERVED;		/* 1024 */
86 int	ipport_lastauto  = IPPORT_USERRESERVED;		/* 5000 */
87 int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
88 int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
89 
90 #define RANGECHK(var, min, max) \
91 	if ((var) < (min)) { (var) = (min); } \
92 	else if ((var) > (max)) { (var) = (max); }
93 
94 static int
95 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
96 {
97 	int error = sysctl_handle_int(oidp,
98 		oidp->oid_arg1, oidp->oid_arg2, req);
99 	if (!error) {
100 		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
101 		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
102 		RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
103 		RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
104 		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
105 		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
106 	}
107 	return error;
108 }
109 
110 #undef RANGECHK
111 
112 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
113 
114 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
115 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
116 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
117 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
118 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
119 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
120 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
121 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
122 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
123 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
124 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
125 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
126 
127 /*
128  * in_pcb.c: manage the Protocol Control Blocks.
129  *
130  * NOTE: It is assumed that most of these functions will be called at
131  * splnet(). XXX - There are, unfortunately, a few exceptions to this
132  * rule that should be fixed.
133  */
134 
135 /*
136  * Allocate a PCB and associate it with the socket.
137  */
138 int
139 in_pcballoc(so, pcbinfo, p)
140 	struct socket *so;
141 	struct inpcbinfo *pcbinfo;
142 	struct proc *p;
143 {
144 	register struct inpcb *inp;
145 
146 	inp = zalloc(pcbinfo->ipi_zone);
147 	if (inp == NULL)
148 		return (ENOBUFS);
149 	bzero((caddr_t)inp, sizeof(*inp));
150 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
151 	inp->inp_pcbinfo = pcbinfo;
152 	inp->inp_socket = so;
153 #if defined(INET6)
154 	if (ip6_mapped_addr_on)
155 		inp->inp_flags &= ~IN6P_BINDV6ONLY;
156 	else
157 		inp->inp_flags |= IN6P_BINDV6ONLY;
158 #endif
159 	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
160 	pcbinfo->ipi_count++;
161 	so->so_pcb = (caddr_t)inp;
162 	return (0);
163 }
164 
165 int
166 in_pcbbind(inp, nam, p)
167 	register struct inpcb *inp;
168 	struct sockaddr *nam;
169 	struct proc *p;
170 {
171 	register struct socket *so = inp->inp_socket;
172 	unsigned short *lastport;
173 	struct sockaddr_in *sin;
174 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
175 	u_short lport = 0;
176 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
177 	int error, prison = 0;
178 
179 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
180 		return (EADDRNOTAVAIL);
181 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
182 		return (EINVAL);
183 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
184 		wild = 1;
185 	if (nam) {
186 		sin = (struct sockaddr_in *)nam;
187 		if (nam->sa_len != sizeof (*sin))
188 			return (EINVAL);
189 #ifdef notdef
190 		/*
191 		 * We should check the family, but old programs
192 		 * incorrectly fail to initialize it.
193 		 */
194 		if (sin->sin_family != AF_INET)
195 			return (EAFNOSUPPORT);
196 #endif
197 		if (sin->sin_addr.s_addr != INADDR_ANY)
198 			if (prison_ip(p->p_ucred, 0, &sin->sin_addr.s_addr))
199 				return(EINVAL);
200 		lport = sin->sin_port;
201 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
202 			/*
203 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
204 			 * allow complete duplication of binding if
205 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
206 			 * and a multicast address is bound on both
207 			 * new and duplicated sockets.
208 			 */
209 			if (so->so_options & SO_REUSEADDR)
210 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
211 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
212 			sin->sin_port = 0;		/* yech... */
213 			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
214 				return (EADDRNOTAVAIL);
215 		}
216 		if (lport) {
217 			struct inpcb *t;
218 			/* GROSS */
219 			if (ntohs(lport) < IPPORT_RESERVED && p &&
220 			    suser_xxx(0, p, PRISON_ROOT))
221 				return (EACCES);
222 			if (p && jailed(p->p_ucred))
223 				prison = 1;
224 			if (so->so_cred->cr_uid != 0 &&
225 			    !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
226 				t = in_pcblookup_local(inp->inp_pcbinfo,
227 				    sin->sin_addr, lport,
228 				    prison ? 0 :  INPLOOKUP_WILDCARD);
229 				if (t &&
230 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
231 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
232 				     (t->inp_socket->so_options &
233 					 SO_REUSEPORT) == 0) &&
234 				    (so->so_cred->cr_uid !=
235 				     t->inp_socket->so_cred->cr_uid)) {
236 #if defined(INET6)
237 					if ((inp->inp_flags &
238 					     IN6P_BINDV6ONLY) != 0 ||
239 					    ntohl(sin->sin_addr.s_addr) !=
240 					    INADDR_ANY ||
241 					    ntohl(t->inp_laddr.s_addr) !=
242 					    INADDR_ANY ||
243 					    INP_SOCKAF(so) ==
244 					    INP_SOCKAF(t->inp_socket))
245 #endif /* defined(INET6) */
246 					return (EADDRINUSE);
247 				}
248 			}
249 			if (prison &&
250 			    prison_ip(p->p_ucred, 0, &sin->sin_addr.s_addr))
251 				return (EADDRNOTAVAIL);
252 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
253 			    lport, prison ? 0 : wild);
254 			if (t &&
255 			    (reuseport & t->inp_socket->so_options) == 0) {
256 #if defined(INET6)
257 				if ((inp->inp_flags & IN6P_BINDV6ONLY) != 0 ||
258 				    ntohl(sin->sin_addr.s_addr) !=
259 				    INADDR_ANY ||
260 				    ntohl(t->inp_laddr.s_addr) !=
261 				    INADDR_ANY ||
262 				    INP_SOCKAF(so) ==
263 				    INP_SOCKAF(t->inp_socket))
264 #endif /* defined(INET6) */
265 				return (EADDRINUSE);
266 			}
267 		}
268 		inp->inp_laddr = sin->sin_addr;
269 	}
270 	if (lport == 0) {
271 		ushort first, last;
272 		int count;
273 
274 		if (inp->inp_laddr.s_addr != INADDR_ANY)
275 			if (prison_ip(p->p_ucred, 0, &inp->inp_laddr.s_addr )) {
276 				inp->inp_laddr.s_addr = INADDR_ANY;
277 				return (EINVAL);
278 			}
279 		inp->inp_flags |= INP_ANONPORT;
280 
281 		if (inp->inp_flags & INP_HIGHPORT) {
282 			first = ipport_hifirstauto;	/* sysctl */
283 			last  = ipport_hilastauto;
284 			lastport = &pcbinfo->lasthi;
285 		} else if (inp->inp_flags & INP_LOWPORT) {
286 			if (p && (error = suser_xxx(0, p, PRISON_ROOT))) {
287 				inp->inp_laddr.s_addr = INADDR_ANY;
288 				return error;
289 			}
290 			first = ipport_lowfirstauto;	/* 1023 */
291 			last  = ipport_lowlastauto;	/* 600 */
292 			lastport = &pcbinfo->lastlow;
293 		} else {
294 			first = ipport_firstauto;	/* sysctl */
295 			last  = ipport_lastauto;
296 			lastport = &pcbinfo->lastport;
297 		}
298 		/*
299 		 * Simple check to ensure all ports are not used up causing
300 		 * a deadlock here.
301 		 *
302 		 * We split the two cases (up and down) so that the direction
303 		 * is not being tested on each round of the loop.
304 		 */
305 		if (first > last) {
306 			/*
307 			 * counting down
308 			 */
309 			count = first - last;
310 
311 			do {
312 				if (count-- < 0) {	/* completely used? */
313 					inp->inp_laddr.s_addr = INADDR_ANY;
314 					return (EADDRNOTAVAIL);
315 				}
316 				--*lastport;
317 				if (*lastport > first || *lastport < last)
318 					*lastport = first;
319 				lport = htons(*lastport);
320 			} while (in_pcblookup_local(pcbinfo,
321 				 inp->inp_laddr, lport, wild));
322 		} else {
323 			/*
324 			 * counting up
325 			 */
326 			count = last - first;
327 
328 			do {
329 				if (count-- < 0) {	/* completely used? */
330 					/*
331 					 * Undo any address bind that may have
332 					 * occurred above.
333 					 */
334 					inp->inp_laddr.s_addr = INADDR_ANY;
335 					return (EADDRNOTAVAIL);
336 				}
337 				++*lastport;
338 				if (*lastport < first || *lastport > last)
339 					*lastport = first;
340 				lport = htons(*lastport);
341 			} while (in_pcblookup_local(pcbinfo,
342 				 inp->inp_laddr, lport, wild));
343 		}
344 	}
345 	inp->inp_lport = lport;
346 	if (prison_ip(p->p_ucred, 0, &inp->inp_laddr.s_addr)) {
347 		inp->inp_laddr.s_addr = INADDR_ANY;
348 		inp->inp_lport = 0;
349 		return(EINVAL);
350 	}
351 	if (in_pcbinshash(inp) != 0) {
352 		inp->inp_laddr.s_addr = INADDR_ANY;
353 		inp->inp_lport = 0;
354 		return (EAGAIN);
355 	}
356 	return (0);
357 }
358 
359 /*
360  *   Transform old in_pcbconnect() into an inner subroutine for new
361  *   in_pcbconnect(): Do some validity-checking on the remote
362  *   address (in mbuf 'nam') and then determine local host address
363  *   (i.e., which interface) to use to access that remote host.
364  *
365  *   This preserves definition of in_pcbconnect(), while supporting a
366  *   slightly different version for T/TCP.  (This is more than
367  *   a bit of a kludge, but cleaning up the internal interfaces would
368  *   have forced minor changes in every protocol).
369  */
370 
371 int
372 in_pcbladdr(inp, nam, plocal_sin)
373 	register struct inpcb *inp;
374 	struct sockaddr *nam;
375 	struct sockaddr_in **plocal_sin;
376 {
377 	struct in_ifaddr *ia;
378 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
379 
380 	if (nam->sa_len != sizeof (*sin))
381 		return (EINVAL);
382 	if (sin->sin_family != AF_INET)
383 		return (EAFNOSUPPORT);
384 	if (sin->sin_port == 0)
385 		return (EADDRNOTAVAIL);
386 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
387 		/*
388 		 * If the destination address is INADDR_ANY,
389 		 * use the primary local address.
390 		 * If the supplied address is INADDR_BROADCAST,
391 		 * and the primary interface supports broadcast,
392 		 * choose the broadcast address for that interface.
393 		 */
394 #define	satosin(sa)	((struct sockaddr_in *)(sa))
395 #define sintosa(sin)	((struct sockaddr *)(sin))
396 #define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
397 		if (sin->sin_addr.s_addr == INADDR_ANY)
398 		    sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
399 		else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
400 		  (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
401 		    sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
402 	}
403 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
404 		register struct route *ro;
405 
406 		ia = (struct in_ifaddr *)0;
407 		/*
408 		 * If route is known or can be allocated now,
409 		 * our src addr is taken from the i/f, else punt.
410 		 */
411 		ro = &inp->inp_route;
412 		if (ro->ro_rt &&
413 		    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
414 			sin->sin_addr.s_addr ||
415 		    inp->inp_socket->so_options & SO_DONTROUTE)) {
416 			RTFREE(ro->ro_rt);
417 			ro->ro_rt = (struct rtentry *)0;
418 		}
419 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
420 		    (ro->ro_rt == (struct rtentry *)0 ||
421 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
422 			/* No route yet, so try to acquire one */
423 			ro->ro_dst.sa_family = AF_INET;
424 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
425 			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
426 				sin->sin_addr;
427 			rtalloc(ro);
428 		}
429 		/*
430 		 * If we found a route, use the address
431 		 * corresponding to the outgoing interface
432 		 * unless it is the loopback (in case a route
433 		 * to our address on another net goes to loopback).
434 		 */
435 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
436 			ia = ifatoia(ro->ro_rt->rt_ifa);
437 		if (ia == 0) {
438 			u_short fport = sin->sin_port;
439 
440 			sin->sin_port = 0;
441 			ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
442 			if (ia == 0)
443 				ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
444 			sin->sin_port = fport;
445 			if (ia == 0)
446 				ia = TAILQ_FIRST(&in_ifaddrhead);
447 			if (ia == 0)
448 				return (EADDRNOTAVAIL);
449 		}
450 		/*
451 		 * If the destination address is multicast and an outgoing
452 		 * interface has been set as a multicast option, use the
453 		 * address of that interface as our source address.
454 		 */
455 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
456 		    inp->inp_moptions != NULL) {
457 			struct ip_moptions *imo;
458 			struct ifnet *ifp;
459 
460 			imo = inp->inp_moptions;
461 			if (imo->imo_multicast_ifp != NULL) {
462 				ifp = imo->imo_multicast_ifp;
463 				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
464 					if (ia->ia_ifp == ifp)
465 						break;
466 				if (ia == 0)
467 					return (EADDRNOTAVAIL);
468 			}
469 		}
470 	/*
471 	 * Don't do pcblookup call here; return interface in plocal_sin
472 	 * and exit to caller, that will do the lookup.
473 	 */
474 		*plocal_sin = &ia->ia_addr;
475 
476 	}
477 	return(0);
478 }
479 
480 /*
481  * Outer subroutine:
482  * Connect from a socket to a specified address.
483  * Both address and port must be specified in argument sin.
484  * If don't have a local address for this socket yet,
485  * then pick one.
486  */
487 int
488 in_pcbconnect(inp, nam, p)
489 	register struct inpcb *inp;
490 	struct sockaddr *nam;
491 	struct proc *p;
492 {
493 	struct sockaddr_in *ifaddr;
494 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
495 	struct sockaddr_in sa;
496 	struct ucred *cred;
497 	int error;
498 
499 	cred = inp->inp_socket->so_cred;
500 	if (inp->inp_laddr.s_addr == INADDR_ANY && jailed(cred)) {
501 		bzero(&sa, sizeof (sa));
502 		sa.sin_addr.s_addr = htonl(cred->cr_prison->pr_ip);
503 		sa.sin_len=sizeof (sa);
504 		sa.sin_family = AF_INET;
505 		error = in_pcbbind(inp, (struct sockaddr *)&sa, p);
506 		if (error)
507 		    return (error);
508 	}
509 	/*
510 	 *   Call inner routine, to assign local interface address.
511 	 */
512 	if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
513 		return(error);
514 
515 	if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
516 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
517 	    inp->inp_lport, 0, NULL) != NULL) {
518 		return (EADDRINUSE);
519 	}
520 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
521 		if (inp->inp_lport == 0) {
522 			error = in_pcbbind(inp, (struct sockaddr *)0, p);
523 			if (error)
524 				return (error);
525 		}
526 		inp->inp_laddr = ifaddr->sin_addr;
527 	}
528 	inp->inp_faddr = sin->sin_addr;
529 	inp->inp_fport = sin->sin_port;
530 	in_pcbrehash(inp);
531 	return (0);
532 }
533 
534 void
535 in_pcbdisconnect(inp)
536 	struct inpcb *inp;
537 {
538 
539 	inp->inp_faddr.s_addr = INADDR_ANY;
540 	inp->inp_fport = 0;
541 	in_pcbrehash(inp);
542 	if (inp->inp_socket->so_state & SS_NOFDREF)
543 		in_pcbdetach(inp);
544 }
545 
546 void
547 in_pcbdetach(inp)
548 	struct inpcb *inp;
549 {
550 	struct socket *so = inp->inp_socket;
551 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
552 	struct rtentry *rt  = inp->inp_route.ro_rt;
553 
554 #ifdef IPSEC
555 	ipsec4_delete_pcbpolicy(inp);
556 #endif /*IPSEC*/
557 	inp->inp_gencnt = ++ipi->ipi_gencnt;
558 	in_pcbremlists(inp);
559 	so->so_pcb = 0;
560 	sofree(so);
561 	if (inp->inp_options)
562 		(void)m_free(inp->inp_options);
563 	if (rt) {
564 		/*
565 		 * route deletion requires reference count to be <= zero
566 		 */
567 		if ((rt->rt_flags & RTF_DELCLONE) &&
568 		    (rt->rt_flags & RTF_WASCLONED) &&
569 		    (rt->rt_refcnt <= 1)) {
570 			rt->rt_refcnt--;
571 			rt->rt_flags &= ~RTF_UP;
572 			rtrequest(RTM_DELETE, rt_key(rt),
573 				  rt->rt_gateway, rt_mask(rt),
574 				  rt->rt_flags, (struct rtentry **)0);
575 		}
576 		else
577 			rtfree(rt);
578 	}
579 	ip_freemoptions(inp->inp_moptions);
580 	inp->inp_vflag = 0;
581 	zfree(ipi->ipi_zone, inp);
582 }
583 
584 /*
585  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
586  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
587  * in struct pr_usrreqs, so that protocols can just reference then directly
588  * without the need for a wrapper function.  The socket must have a valid
589  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
590  * except through a kernel programming error, so it is acceptable to panic
591  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
592  * because there actually /is/ a programming error somewhere... XXX)
593  */
594 int
595 in_setsockaddr(so, nam)
596 	struct socket *so;
597 	struct sockaddr **nam;
598 {
599 	int s;
600 	register struct inpcb *inp;
601 	register struct sockaddr_in *sin;
602 
603 	/*
604 	 * Do the malloc first in case it blocks.
605 	 */
606 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
607 		M_WAITOK | M_ZERO);
608 	sin->sin_family = AF_INET;
609 	sin->sin_len = sizeof(*sin);
610 
611 	s = splnet();
612 	inp = sotoinpcb(so);
613 	if (!inp) {
614 		splx(s);
615 		free(sin, M_SONAME);
616 		return ECONNRESET;
617 	}
618 	sin->sin_port = inp->inp_lport;
619 	sin->sin_addr = inp->inp_laddr;
620 	splx(s);
621 
622 	*nam = (struct sockaddr *)sin;
623 	return 0;
624 }
625 
626 int
627 in_setpeeraddr(so, nam)
628 	struct socket *so;
629 	struct sockaddr **nam;
630 {
631 	int s;
632 	struct inpcb *inp;
633 	register struct sockaddr_in *sin;
634 
635 	/*
636 	 * Do the malloc first in case it blocks.
637 	 */
638 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
639 		M_WAITOK | M_ZERO);
640 	sin->sin_family = AF_INET;
641 	sin->sin_len = sizeof(*sin);
642 
643 	s = splnet();
644 	inp = sotoinpcb(so);
645 	if (!inp) {
646 		splx(s);
647 		free(sin, M_SONAME);
648 		return ECONNRESET;
649 	}
650 	sin->sin_port = inp->inp_fport;
651 	sin->sin_addr = inp->inp_faddr;
652 	splx(s);
653 
654 	*nam = (struct sockaddr *)sin;
655 	return 0;
656 }
657 
658 void
659 in_pcbnotifyall(head, faddr, errno, notify)
660 	struct inpcbhead *head;
661 	struct in_addr faddr;
662 	int errno;
663 	void (*notify) __P((struct inpcb *, int));
664 {
665 	struct inpcb *inp, *ninp;
666 	int s;
667 
668 	s = splnet();
669 	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
670 		ninp = LIST_NEXT(inp, inp_list);
671 #ifdef INET6
672 		if ((inp->inp_vflag & INP_IPV4) == 0)
673 			continue;
674 #endif
675 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
676 		    inp->inp_socket == NULL)
677 				continue;
678 		(*notify)(inp, errno);
679 	}
680 	splx(s);
681 }
682 
683 /*
684  * Check for alternatives when higher level complains
685  * about service problems.  For now, invalidate cached
686  * routing information.  If the route was created dynamically
687  * (by a redirect), time to try a default gateway again.
688  */
689 void
690 in_losing(inp)
691 	struct inpcb *inp;
692 {
693 	register struct rtentry *rt;
694 	struct rt_addrinfo info;
695 
696 	if ((rt = inp->inp_route.ro_rt)) {
697 		inp->inp_route.ro_rt = 0;
698 		bzero((caddr_t)&info, sizeof(info));
699 		info.rti_info[RTAX_DST] =
700 			(struct sockaddr *)&inp->inp_route.ro_dst;
701 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
702 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
703 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
704 		if (rt->rt_flags & RTF_DYNAMIC)
705 			(void) rtrequest(RTM_DELETE, rt_key(rt),
706 				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
707 				(struct rtentry **)0);
708 		else
709 		/*
710 		 * A new route can be allocated
711 		 * the next time output is attempted.
712 		 */
713 			rtfree(rt);
714 	}
715 }
716 
717 /*
718  * After a routing change, flush old routing
719  * and allocate a (hopefully) better one.
720  */
721 void
722 in_rtchange(inp, errno)
723 	register struct inpcb *inp;
724 	int errno;
725 {
726 	if (inp->inp_route.ro_rt) {
727 		rtfree(inp->inp_route.ro_rt);
728 		inp->inp_route.ro_rt = 0;
729 		/*
730 		 * A new route can be allocated the next time
731 		 * output is attempted.
732 		 */
733 	}
734 }
735 
736 /*
737  * Lookup a PCB based on the local address and port.
738  */
739 struct inpcb *
740 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
741 	struct inpcbinfo *pcbinfo;
742 	struct in_addr laddr;
743 	u_int lport_arg;
744 	int wild_okay;
745 {
746 	register struct inpcb *inp;
747 	int matchwild = 3, wildcard;
748 	u_short lport = lport_arg;
749 
750 	if (!wild_okay) {
751 		struct inpcbhead *head;
752 		/*
753 		 * Look for an unconnected (wildcard foreign addr) PCB that
754 		 * matches the local address and port we're looking for.
755 		 */
756 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
757 		LIST_FOREACH(inp, head, inp_hash) {
758 #ifdef INET6
759 			if ((inp->inp_vflag & INP_IPV4) == 0)
760 				continue;
761 #endif
762 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
763 			    inp->inp_laddr.s_addr == laddr.s_addr &&
764 			    inp->inp_lport == lport) {
765 				/*
766 				 * Found.
767 				 */
768 				return (inp);
769 			}
770 		}
771 		/*
772 		 * Not found.
773 		 */
774 		return (NULL);
775 	} else {
776 		struct inpcbporthead *porthash;
777 		struct inpcbport *phd;
778 		struct inpcb *match = NULL;
779 		/*
780 		 * Best fit PCB lookup.
781 		 *
782 		 * First see if this local port is in use by looking on the
783 		 * port hash list.
784 		 */
785 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
786 		    pcbinfo->porthashmask)];
787 		LIST_FOREACH(phd, porthash, phd_hash) {
788 			if (phd->phd_port == lport)
789 				break;
790 		}
791 		if (phd != NULL) {
792 			/*
793 			 * Port is in use by one or more PCBs. Look for best
794 			 * fit.
795 			 */
796 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
797 				wildcard = 0;
798 #ifdef INET6
799 				if ((inp->inp_vflag & INP_IPV4) == 0)
800 					continue;
801 #endif
802 				if (inp->inp_faddr.s_addr != INADDR_ANY)
803 					wildcard++;
804 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
805 					if (laddr.s_addr == INADDR_ANY)
806 						wildcard++;
807 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
808 						continue;
809 				} else {
810 					if (laddr.s_addr != INADDR_ANY)
811 						wildcard++;
812 				}
813 				if (wildcard < matchwild) {
814 					match = inp;
815 					matchwild = wildcard;
816 					if (matchwild == 0) {
817 						break;
818 					}
819 				}
820 			}
821 		}
822 		return (match);
823 	}
824 }
825 
826 /*
827  * Lookup PCB in hash list.
828  */
829 struct inpcb *
830 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
831 		  ifp)
832 	struct inpcbinfo *pcbinfo;
833 	struct in_addr faddr, laddr;
834 	u_int fport_arg, lport_arg;
835 	int wildcard;
836 	struct ifnet *ifp;
837 {
838 	struct inpcbhead *head;
839 	register struct inpcb *inp;
840 	u_short fport = fport_arg, lport = lport_arg;
841 
842 	/*
843 	 * First look for an exact match.
844 	 */
845 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
846 	LIST_FOREACH(inp, head, inp_hash) {
847 #ifdef INET6
848 		if ((inp->inp_vflag & INP_IPV4) == 0)
849 			continue;
850 #endif
851 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
852 		    inp->inp_laddr.s_addr == laddr.s_addr &&
853 		    inp->inp_fport == fport &&
854 		    inp->inp_lport == lport) {
855 			/*
856 			 * Found.
857 			 */
858 			return (inp);
859 		}
860 	}
861 	if (wildcard) {
862 		struct inpcb *local_wild = NULL;
863 #if defined(INET6)
864 		struct inpcb *local_wild_mapped = NULL;
865 #endif /* defined(INET6) */
866 
867 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
868 		LIST_FOREACH(inp, head, inp_hash) {
869 #ifdef INET6
870 			if ((inp->inp_vflag & INP_IPV4) == 0)
871 				continue;
872 #endif
873 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
874 			    inp->inp_lport == lport) {
875 #if defined(NFAITH) && NFAITH > 0
876 				if (ifp && ifp->if_type == IFT_FAITH &&
877 				    (inp->inp_flags & INP_FAITH) == 0)
878 					continue;
879 #endif
880 				if (inp->inp_laddr.s_addr == laddr.s_addr)
881 					return (inp);
882 				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
883 #if defined(INET6)
884 					if (INP_CHECK_SOCKAF(inp->inp_socket,
885 							     AF_INET6))
886 						local_wild_mapped = inp;
887 					else
888 #endif /* defined(INET6) */
889 					local_wild = inp;
890 				}
891 			}
892 		}
893 #if defined(INET6)
894 		if (local_wild == NULL)
895 			return (local_wild_mapped);
896 #endif /* defined(INET6) */
897 		return (local_wild);
898 	}
899 
900 	/*
901 	 * Not found.
902 	 */
903 	return (NULL);
904 }
905 
906 /*
907  * Insert PCB onto various hash lists.
908  */
909 int
910 in_pcbinshash(inp)
911 	struct inpcb *inp;
912 {
913 	struct inpcbhead *pcbhash;
914 	struct inpcbporthead *pcbporthash;
915 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
916 	struct inpcbport *phd;
917 	u_int32_t hashkey_faddr;
918 
919 #ifdef INET6
920 	if (inp->inp_vflag & INP_IPV6)
921 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
922 	else
923 #endif /* INET6 */
924 	hashkey_faddr = inp->inp_faddr.s_addr;
925 
926 	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
927 		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
928 
929 	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
930 	    pcbinfo->porthashmask)];
931 
932 	/*
933 	 * Go through port list and look for a head for this lport.
934 	 */
935 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
936 		if (phd->phd_port == inp->inp_lport)
937 			break;
938 	}
939 	/*
940 	 * If none exists, malloc one and tack it on.
941 	 */
942 	if (phd == NULL) {
943 		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
944 		if (phd == NULL) {
945 			return (ENOBUFS); /* XXX */
946 		}
947 		phd->phd_port = inp->inp_lport;
948 		LIST_INIT(&phd->phd_pcblist);
949 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
950 	}
951 	inp->inp_phd = phd;
952 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
953 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
954 	return (0);
955 }
956 
957 /*
958  * Move PCB to the proper hash bucket when { faddr, fport } have  been
959  * changed. NOTE: This does not handle the case of the lport changing (the
960  * hashed port list would have to be updated as well), so the lport must
961  * not change after in_pcbinshash() has been called.
962  */
963 void
964 in_pcbrehash(inp)
965 	struct inpcb *inp;
966 {
967 	struct inpcbhead *head;
968 	u_int32_t hashkey_faddr;
969 
970 #ifdef INET6
971 	if (inp->inp_vflag & INP_IPV6)
972 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
973 	else
974 #endif /* INET6 */
975 	hashkey_faddr = inp->inp_faddr.s_addr;
976 
977 	head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
978 		inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
979 
980 	LIST_REMOVE(inp, inp_hash);
981 	LIST_INSERT_HEAD(head, inp, inp_hash);
982 }
983 
984 /*
985  * Remove PCB from various lists.
986  */
987 void
988 in_pcbremlists(inp)
989 	struct inpcb *inp;
990 {
991 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
992 	if (inp->inp_lport) {
993 		struct inpcbport *phd = inp->inp_phd;
994 
995 		LIST_REMOVE(inp, inp_hash);
996 		LIST_REMOVE(inp, inp_portlist);
997 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
998 			LIST_REMOVE(phd, phd_hash);
999 			free(phd, M_PCB);
1000 		}
1001 	}
1002 	LIST_REMOVE(inp, inp_list);
1003 	inp->inp_pcbinfo->ipi_count--;
1004 }
1005 
1006 int
1007 prison_xinpcb(struct proc *p, struct inpcb *inp)
1008 {
1009 	if (!jailed(p->p_ucred))
1010 		return (0);
1011 	if (ntohl(inp->inp_laddr.s_addr) == p->p_ucred->cr_prison->pr_ip)
1012 		return (0);
1013 	return (1);
1014 }
1015