xref: /freebsd/sys/netinet/in_pcb.c (revision 1b6c2589164a3a7b2f62d4c28c2ffa1be860959e)
1 /*
2  * Copyright (c) 1982, 1986, 1991, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_ipsec.h"
38 #include "opt_inet6.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/domain.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/proc.h>
49 #include <sys/jail.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 
53 #include <machine/limits.h>
54 
55 #include <vm/vm_zone.h>
56 
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 
61 #include <netinet/in.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/in_var.h>
64 #include <netinet/ip_var.h>
65 #ifdef INET6
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #endif /* INET6 */
69 
70 #ifdef IPSEC
71 #include <netinet6/ipsec.h>
72 #include <netkey/key.h>
73 #endif /* IPSEC */
74 
75 struct	in_addr zeroin_addr;
76 
77 /*
78  * These configure the range of local port addresses assigned to
79  * "unspecified" outgoing connections/packets/whatever.
80  */
81 int	ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
82 int	ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
83 int	ipport_firstauto = IPPORT_RESERVED;		/* 1024 */
84 int	ipport_lastauto  = IPPORT_USERRESERVED;		/* 5000 */
85 int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
86 int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
87 
88 #define RANGECHK(var, min, max) \
89 	if ((var) < (min)) { (var) = (min); } \
90 	else if ((var) > (max)) { (var) = (max); }
91 
92 static int
93 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
94 {
95 	int error = sysctl_handle_int(oidp,
96 		oidp->oid_arg1, oidp->oid_arg2, req);
97 	if (!error) {
98 		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
99 		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
100 		RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
101 		RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
102 		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
103 		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
104 	}
105 	return error;
106 }
107 
108 #undef RANGECHK
109 
110 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
111 
112 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
113 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
114 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
115 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
116 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
117 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
118 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
119 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
120 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
121 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
122 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
123 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
124 
125 /*
126  * in_pcb.c: manage the Protocol Control Blocks.
127  *
128  * NOTE: It is assumed that most of these functions will be called at
129  * splnet(). XXX - There are, unfortunately, a few exceptions to this
130  * rule that should be fixed.
131  */
132 
133 /*
134  * Allocate a PCB and associate it with the socket.
135  */
136 int
137 in_pcballoc(so, pcbinfo, td)
138 	struct socket *so;
139 	struct inpcbinfo *pcbinfo;
140 	struct thread *td;
141 {
142 	register struct inpcb *inp;
143 #ifdef IPSEC
144 	int error;
145 #endif
146 
147 	inp = zalloc(pcbinfo->ipi_zone);
148 	if (inp == NULL)
149 		return (ENOBUFS);
150 	bzero((caddr_t)inp, sizeof(*inp));
151 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
152 	inp->inp_pcbinfo = pcbinfo;
153 	inp->inp_socket = so;
154 #ifdef IPSEC
155 	error = ipsec_init_policy(so, &inp->inp_sp);
156 	if (error != 0) {
157 		zfree(pcbinfo->ipi_zone, inp);
158 		return error;
159 	}
160 #endif /*IPSEC*/
161 #if defined(INET6)
162 	if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on)
163 		inp->inp_flags |= IN6P_IPV6_V6ONLY;
164 #endif
165 	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
166 	pcbinfo->ipi_count++;
167 	so->so_pcb = (caddr_t)inp;
168 #ifdef INET6
169 	if (ip6_auto_flowlabel)
170 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
171 #endif
172 	return (0);
173 }
174 
175 int
176 in_pcbbind(inp, nam, td)
177 	register struct inpcb *inp;
178 	struct sockaddr *nam;
179 	struct thread *td;
180 {
181 	struct proc *p = td->td_proc;
182 	register struct socket *so = inp->inp_socket;
183 	unsigned short *lastport;
184 	struct sockaddr_in *sin;
185 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
186 	u_short lport = 0;
187 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
188 	int error, prison = 0;
189 
190 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
191 		return (EADDRNOTAVAIL);
192 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
193 		return (EINVAL);
194 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
195 		wild = 1;
196 	if (nam) {
197 		sin = (struct sockaddr_in *)nam;
198 		if (nam->sa_len != sizeof (*sin))
199 			return (EINVAL);
200 #ifdef notdef
201 		/*
202 		 * We should check the family, but old programs
203 		 * incorrectly fail to initialize it.
204 		 */
205 		if (sin->sin_family != AF_INET)
206 			return (EAFNOSUPPORT);
207 #endif
208 		if (sin->sin_addr.s_addr != INADDR_ANY)
209 			if (prison_ip(p->p_ucred, 0, &sin->sin_addr.s_addr))
210 				return(EINVAL);
211 		lport = sin->sin_port;
212 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
213 			/*
214 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
215 			 * allow complete duplication of binding if
216 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
217 			 * and a multicast address is bound on both
218 			 * new and duplicated sockets.
219 			 */
220 			if (so->so_options & SO_REUSEADDR)
221 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
222 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
223 			sin->sin_port = 0;		/* yech... */
224 			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
225 				return (EADDRNOTAVAIL);
226 		}
227 		if (lport) {
228 			struct inpcb *t;
229 			/* GROSS */
230 			if (ntohs(lport) < IPPORT_RESERVED && p &&
231 			    suser_xxx(0, p, PRISON_ROOT))
232 				return (EACCES);
233 			if (p && jailed(p->p_ucred))
234 				prison = 1;
235 			if (so->so_cred->cr_uid != 0 &&
236 			    !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
237 				t = in_pcblookup_local(inp->inp_pcbinfo,
238 				    sin->sin_addr, lport,
239 				    prison ? 0 :  INPLOOKUP_WILDCARD);
240 				if (t &&
241 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
242 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
243 				     (t->inp_socket->so_options &
244 					 SO_REUSEPORT) == 0) &&
245 				    (so->so_cred->cr_uid !=
246 				     t->inp_socket->so_cred->cr_uid)) {
247 #if defined(INET6)
248 					if (ntohl(sin->sin_addr.s_addr) !=
249 					    INADDR_ANY ||
250 					    ntohl(t->inp_laddr.s_addr) !=
251 					    INADDR_ANY ||
252 					    INP_SOCKAF(so) ==
253 					    INP_SOCKAF(t->inp_socket))
254 #endif /* defined(INET6) */
255 					return (EADDRINUSE);
256 				}
257 			}
258 			if (prison &&
259 			    prison_ip(p->p_ucred, 0, &sin->sin_addr.s_addr))
260 				return (EADDRNOTAVAIL);
261 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
262 			    lport, prison ? 0 : wild);
263 			if (t &&
264 			    (reuseport & t->inp_socket->so_options) == 0) {
265 #if defined(INET6)
266 				if (ntohl(sin->sin_addr.s_addr) !=
267 				    INADDR_ANY ||
268 				    ntohl(t->inp_laddr.s_addr) !=
269 				    INADDR_ANY ||
270 				    INP_SOCKAF(so) ==
271 				    INP_SOCKAF(t->inp_socket))
272 #endif /* defined(INET6) */
273 				return (EADDRINUSE);
274 			}
275 		}
276 		inp->inp_laddr = sin->sin_addr;
277 	}
278 	if (lport == 0) {
279 		ushort first, last;
280 		int count;
281 
282 		if (inp->inp_laddr.s_addr != INADDR_ANY)
283 			if (prison_ip(p->p_ucred, 0, &inp->inp_laddr.s_addr )) {
284 				inp->inp_laddr.s_addr = INADDR_ANY;
285 				return (EINVAL);
286 			}
287 		inp->inp_flags |= INP_ANONPORT;
288 
289 		if (inp->inp_flags & INP_HIGHPORT) {
290 			first = ipport_hifirstauto;	/* sysctl */
291 			last  = ipport_hilastauto;
292 			lastport = &pcbinfo->lasthi;
293 		} else if (inp->inp_flags & INP_LOWPORT) {
294 			if (p && (error = suser_xxx(0, p, PRISON_ROOT))) {
295 				inp->inp_laddr.s_addr = INADDR_ANY;
296 				return error;
297 			}
298 			first = ipport_lowfirstauto;	/* 1023 */
299 			last  = ipport_lowlastauto;	/* 600 */
300 			lastport = &pcbinfo->lastlow;
301 		} else {
302 			first = ipport_firstauto;	/* sysctl */
303 			last  = ipport_lastauto;
304 			lastport = &pcbinfo->lastport;
305 		}
306 		/*
307 		 * Simple check to ensure all ports are not used up causing
308 		 * a deadlock here.
309 		 *
310 		 * We split the two cases (up and down) so that the direction
311 		 * is not being tested on each round of the loop.
312 		 */
313 		if (first > last) {
314 			/*
315 			 * counting down
316 			 */
317 			count = first - last;
318 
319 			do {
320 				if (count-- < 0) {	/* completely used? */
321 					inp->inp_laddr.s_addr = INADDR_ANY;
322 					return (EADDRNOTAVAIL);
323 				}
324 				--*lastport;
325 				if (*lastport > first || *lastport < last)
326 					*lastport = first;
327 				lport = htons(*lastport);
328 			} while (in_pcblookup_local(pcbinfo,
329 				 inp->inp_laddr, lport, wild));
330 		} else {
331 			/*
332 			 * counting up
333 			 */
334 			count = last - first;
335 
336 			do {
337 				if (count-- < 0) {	/* completely used? */
338 					/*
339 					 * Undo any address bind that may have
340 					 * occurred above.
341 					 */
342 					inp->inp_laddr.s_addr = INADDR_ANY;
343 					return (EADDRNOTAVAIL);
344 				}
345 				++*lastport;
346 				if (*lastport < first || *lastport > last)
347 					*lastport = first;
348 				lport = htons(*lastport);
349 			} while (in_pcblookup_local(pcbinfo,
350 				 inp->inp_laddr, lport, wild));
351 		}
352 	}
353 	inp->inp_lport = lport;
354 	if (prison_ip(p->p_ucred, 0, &inp->inp_laddr.s_addr)) {
355 		inp->inp_laddr.s_addr = INADDR_ANY;
356 		inp->inp_lport = 0;
357 		return(EINVAL);
358 	}
359 	if (in_pcbinshash(inp) != 0) {
360 		inp->inp_laddr.s_addr = INADDR_ANY;
361 		inp->inp_lport = 0;
362 		return (EAGAIN);
363 	}
364 	return (0);
365 }
366 
367 /*
368  *   Transform old in_pcbconnect() into an inner subroutine for new
369  *   in_pcbconnect(): Do some validity-checking on the remote
370  *   address (in mbuf 'nam') and then determine local host address
371  *   (i.e., which interface) to use to access that remote host.
372  *
373  *   This preserves definition of in_pcbconnect(), while supporting a
374  *   slightly different version for T/TCP.  (This is more than
375  *   a bit of a kludge, but cleaning up the internal interfaces would
376  *   have forced minor changes in every protocol).
377  */
378 
379 int
380 in_pcbladdr(inp, nam, plocal_sin)
381 	register struct inpcb *inp;
382 	struct sockaddr *nam;
383 	struct sockaddr_in **plocal_sin;
384 {
385 	struct in_ifaddr *ia;
386 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
387 
388 	if (nam->sa_len != sizeof (*sin))
389 		return (EINVAL);
390 	if (sin->sin_family != AF_INET)
391 		return (EAFNOSUPPORT);
392 	if (sin->sin_port == 0)
393 		return (EADDRNOTAVAIL);
394 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
395 		/*
396 		 * If the destination address is INADDR_ANY,
397 		 * use the primary local address.
398 		 * If the supplied address is INADDR_BROADCAST,
399 		 * and the primary interface supports broadcast,
400 		 * choose the broadcast address for that interface.
401 		 */
402 		if (sin->sin_addr.s_addr == INADDR_ANY)
403 		    sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
404 		else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
405 		  (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
406 		    sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
407 	}
408 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
409 		register struct route *ro;
410 
411 		ia = (struct in_ifaddr *)0;
412 		/*
413 		 * If route is known or can be allocated now,
414 		 * our src addr is taken from the i/f, else punt.
415 		 */
416 		ro = &inp->inp_route;
417 		if (ro->ro_rt &&
418 		    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
419 			sin->sin_addr.s_addr ||
420 		    inp->inp_socket->so_options & SO_DONTROUTE)) {
421 			RTFREE(ro->ro_rt);
422 			ro->ro_rt = (struct rtentry *)0;
423 		}
424 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
425 		    (ro->ro_rt == (struct rtentry *)0 ||
426 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
427 			/* No route yet, so try to acquire one */
428 			ro->ro_dst.sa_family = AF_INET;
429 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
430 			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
431 				sin->sin_addr;
432 			rtalloc(ro);
433 		}
434 		/*
435 		 * If we found a route, use the address
436 		 * corresponding to the outgoing interface
437 		 * unless it is the loopback (in case a route
438 		 * to our address on another net goes to loopback).
439 		 */
440 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
441 			ia = ifatoia(ro->ro_rt->rt_ifa);
442 		if (ia == 0) {
443 			u_short fport = sin->sin_port;
444 
445 			sin->sin_port = 0;
446 			ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
447 			if (ia == 0)
448 				ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
449 			sin->sin_port = fport;
450 			if (ia == 0)
451 				ia = TAILQ_FIRST(&in_ifaddrhead);
452 			if (ia == 0)
453 				return (EADDRNOTAVAIL);
454 		}
455 		/*
456 		 * If the destination address is multicast and an outgoing
457 		 * interface has been set as a multicast option, use the
458 		 * address of that interface as our source address.
459 		 */
460 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
461 		    inp->inp_moptions != NULL) {
462 			struct ip_moptions *imo;
463 			struct ifnet *ifp;
464 
465 			imo = inp->inp_moptions;
466 			if (imo->imo_multicast_ifp != NULL) {
467 				ifp = imo->imo_multicast_ifp;
468 				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
469 					if (ia->ia_ifp == ifp)
470 						break;
471 				if (ia == 0)
472 					return (EADDRNOTAVAIL);
473 			}
474 		}
475 	/*
476 	 * Don't do pcblookup call here; return interface in plocal_sin
477 	 * and exit to caller, that will do the lookup.
478 	 */
479 		*plocal_sin = &ia->ia_addr;
480 
481 	}
482 	return(0);
483 }
484 
485 /*
486  * Outer subroutine:
487  * Connect from a socket to a specified address.
488  * Both address and port must be specified in argument sin.
489  * If don't have a local address for this socket yet,
490  * then pick one.
491  */
492 int
493 in_pcbconnect(inp, nam, td)
494 	register struct inpcb *inp;
495 	struct sockaddr *nam;
496 	struct thread *td;
497 {
498 	struct sockaddr_in *ifaddr;
499 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
500 	struct sockaddr_in sa;
501 	struct ucred *cred;
502 	int error;
503 
504 	cred = inp->inp_socket->so_cred;
505 	if (inp->inp_laddr.s_addr == INADDR_ANY && jailed(cred)) {
506 		bzero(&sa, sizeof (sa));
507 		sa.sin_addr.s_addr = htonl(cred->cr_prison->pr_ip);
508 		sa.sin_len=sizeof (sa);
509 		sa.sin_family = AF_INET;
510 		error = in_pcbbind(inp, (struct sockaddr *)&sa, td);
511 		if (error)
512 		    return (error);
513 	}
514 	/*
515 	 *   Call inner routine, to assign local interface address.
516 	 */
517 	if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
518 		return(error);
519 
520 	if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
521 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
522 	    inp->inp_lport, 0, NULL) != NULL) {
523 		return (EADDRINUSE);
524 	}
525 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
526 		if (inp->inp_lport == 0) {
527 			error = in_pcbbind(inp, (struct sockaddr *)0, td);
528 			if (error)
529 				return (error);
530 		}
531 		inp->inp_laddr = ifaddr->sin_addr;
532 	}
533 	inp->inp_faddr = sin->sin_addr;
534 	inp->inp_fport = sin->sin_port;
535 	in_pcbrehash(inp);
536 	return (0);
537 }
538 
539 void
540 in_pcbdisconnect(inp)
541 	struct inpcb *inp;
542 {
543 
544 	inp->inp_faddr.s_addr = INADDR_ANY;
545 	inp->inp_fport = 0;
546 	in_pcbrehash(inp);
547 	if (inp->inp_socket->so_state & SS_NOFDREF)
548 		in_pcbdetach(inp);
549 }
550 
551 void
552 in_pcbdetach(inp)
553 	struct inpcb *inp;
554 {
555 	struct socket *so = inp->inp_socket;
556 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
557 	struct rtentry *rt  = inp->inp_route.ro_rt;
558 
559 #ifdef IPSEC
560 	ipsec4_delete_pcbpolicy(inp);
561 #endif /*IPSEC*/
562 	inp->inp_gencnt = ++ipi->ipi_gencnt;
563 	in_pcbremlists(inp);
564 	so->so_pcb = 0;
565 	sofree(so);
566 	if (inp->inp_options)
567 		(void)m_free(inp->inp_options);
568 	if (rt) {
569 		/*
570 		 * route deletion requires reference count to be <= zero
571 		 */
572 		if ((rt->rt_flags & RTF_DELCLONE) &&
573 		    (rt->rt_flags & RTF_WASCLONED) &&
574 		    (rt->rt_refcnt <= 1)) {
575 			rt->rt_refcnt--;
576 			rt->rt_flags &= ~RTF_UP;
577 			rtrequest(RTM_DELETE, rt_key(rt),
578 				  rt->rt_gateway, rt_mask(rt),
579 				  rt->rt_flags, (struct rtentry **)0);
580 		}
581 		else
582 			rtfree(rt);
583 	}
584 	ip_freemoptions(inp->inp_moptions);
585 	inp->inp_vflag = 0;
586 	zfree(ipi->ipi_zone, inp);
587 }
588 
589 /*
590  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
591  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
592  * in struct pr_usrreqs, so that protocols can just reference then directly
593  * without the need for a wrapper function.  The socket must have a valid
594  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
595  * except through a kernel programming error, so it is acceptable to panic
596  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
597  * because there actually /is/ a programming error somewhere... XXX)
598  */
599 int
600 in_setsockaddr(so, nam)
601 	struct socket *so;
602 	struct sockaddr **nam;
603 {
604 	int s;
605 	register struct inpcb *inp;
606 	register struct sockaddr_in *sin;
607 
608 	/*
609 	 * Do the malloc first in case it blocks.
610 	 */
611 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
612 		M_WAITOK | M_ZERO);
613 	sin->sin_family = AF_INET;
614 	sin->sin_len = sizeof(*sin);
615 
616 	s = splnet();
617 	inp = sotoinpcb(so);
618 	if (!inp) {
619 		splx(s);
620 		free(sin, M_SONAME);
621 		return ECONNRESET;
622 	}
623 	sin->sin_port = inp->inp_lport;
624 	sin->sin_addr = inp->inp_laddr;
625 	splx(s);
626 
627 	*nam = (struct sockaddr *)sin;
628 	return 0;
629 }
630 
631 int
632 in_setpeeraddr(so, nam)
633 	struct socket *so;
634 	struct sockaddr **nam;
635 {
636 	int s;
637 	struct inpcb *inp;
638 	register struct sockaddr_in *sin;
639 
640 	/*
641 	 * Do the malloc first in case it blocks.
642 	 */
643 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
644 		M_WAITOK | M_ZERO);
645 	sin->sin_family = AF_INET;
646 	sin->sin_len = sizeof(*sin);
647 
648 	s = splnet();
649 	inp = sotoinpcb(so);
650 	if (!inp) {
651 		splx(s);
652 		free(sin, M_SONAME);
653 		return ECONNRESET;
654 	}
655 	sin->sin_port = inp->inp_fport;
656 	sin->sin_addr = inp->inp_faddr;
657 	splx(s);
658 
659 	*nam = (struct sockaddr *)sin;
660 	return 0;
661 }
662 
663 void
664 in_pcbnotifyall(head, faddr, errno, notify)
665 	struct inpcbhead *head;
666 	struct in_addr faddr;
667 	int errno;
668 	void (*notify) __P((struct inpcb *, int));
669 {
670 	struct inpcb *inp, *ninp;
671 	int s;
672 
673 	s = splnet();
674 	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
675 		ninp = LIST_NEXT(inp, inp_list);
676 #ifdef INET6
677 		if ((inp->inp_vflag & INP_IPV4) == 0)
678 			continue;
679 #endif
680 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
681 		    inp->inp_socket == NULL)
682 				continue;
683 		(*notify)(inp, errno);
684 	}
685 	splx(s);
686 }
687 
688 void
689 in_pcbpurgeif0(head, ifp)
690 	struct inpcb *head;
691 	struct ifnet *ifp;
692 {
693 	struct inpcb *inp;
694 	struct ip_moptions *imo;
695 	int i, gap;
696 
697 	for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
698 		imo = inp->inp_moptions;
699 		if ((inp->inp_vflag & INP_IPV4) &&
700 		    imo != NULL) {
701 			/*
702 			 * Unselect the outgoing interface if it is being
703 			 * detached.
704 			 */
705 			if (imo->imo_multicast_ifp == ifp)
706 				imo->imo_multicast_ifp = NULL;
707 
708 			/*
709 			 * Drop multicast group membership if we joined
710 			 * through the interface being detached.
711 			 */
712 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
713 			    i++) {
714 				if (imo->imo_membership[i]->inm_ifp == ifp) {
715 					in_delmulti(imo->imo_membership[i]);
716 					gap++;
717 				} else if (gap != 0)
718 					imo->imo_membership[i - gap] =
719 					    imo->imo_membership[i];
720 			}
721 			imo->imo_num_memberships -= gap;
722 		}
723 	}
724 }
725 
726 /*
727  * Check for alternatives when higher level complains
728  * about service problems.  For now, invalidate cached
729  * routing information.  If the route was created dynamically
730  * (by a redirect), time to try a default gateway again.
731  */
732 void
733 in_losing(inp)
734 	struct inpcb *inp;
735 {
736 	register struct rtentry *rt;
737 	struct rt_addrinfo info;
738 
739 	if ((rt = inp->inp_route.ro_rt)) {
740 		bzero((caddr_t)&info, sizeof(info));
741 		info.rti_flags = rt->rt_flags;
742 		info.rti_info[RTAX_DST] = rt_key(rt);
743 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
744 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
745 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
746 		if (rt->rt_flags & RTF_DYNAMIC)
747 			(void) rtrequest1(RTM_DELETE, &info, NULL);
748 		inp->inp_route.ro_rt = NULL;
749 		rtfree(rt);
750 		/*
751 		 * A new route can be allocated
752 		 * the next time output is attempted.
753 		 */
754 	}
755 }
756 
757 /*
758  * After a routing change, flush old routing
759  * and allocate a (hopefully) better one.
760  */
761 void
762 in_rtchange(inp, errno)
763 	register struct inpcb *inp;
764 	int errno;
765 {
766 	if (inp->inp_route.ro_rt) {
767 		rtfree(inp->inp_route.ro_rt);
768 		inp->inp_route.ro_rt = 0;
769 		/*
770 		 * A new route can be allocated the next time
771 		 * output is attempted.
772 		 */
773 	}
774 }
775 
776 /*
777  * Lookup a PCB based on the local address and port.
778  */
779 struct inpcb *
780 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
781 	struct inpcbinfo *pcbinfo;
782 	struct in_addr laddr;
783 	u_int lport_arg;
784 	int wild_okay;
785 {
786 	register struct inpcb *inp;
787 	int matchwild = 3, wildcard;
788 	u_short lport = lport_arg;
789 
790 	if (!wild_okay) {
791 		struct inpcbhead *head;
792 		/*
793 		 * Look for an unconnected (wildcard foreign addr) PCB that
794 		 * matches the local address and port we're looking for.
795 		 */
796 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
797 		LIST_FOREACH(inp, head, inp_hash) {
798 #ifdef INET6
799 			if ((inp->inp_vflag & INP_IPV4) == 0)
800 				continue;
801 #endif
802 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
803 			    inp->inp_laddr.s_addr == laddr.s_addr &&
804 			    inp->inp_lport == lport) {
805 				/*
806 				 * Found.
807 				 */
808 				return (inp);
809 			}
810 		}
811 		/*
812 		 * Not found.
813 		 */
814 		return (NULL);
815 	} else {
816 		struct inpcbporthead *porthash;
817 		struct inpcbport *phd;
818 		struct inpcb *match = NULL;
819 		/*
820 		 * Best fit PCB lookup.
821 		 *
822 		 * First see if this local port is in use by looking on the
823 		 * port hash list.
824 		 */
825 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
826 		    pcbinfo->porthashmask)];
827 		LIST_FOREACH(phd, porthash, phd_hash) {
828 			if (phd->phd_port == lport)
829 				break;
830 		}
831 		if (phd != NULL) {
832 			/*
833 			 * Port is in use by one or more PCBs. Look for best
834 			 * fit.
835 			 */
836 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
837 				wildcard = 0;
838 #ifdef INET6
839 				if ((inp->inp_vflag & INP_IPV4) == 0)
840 					continue;
841 #endif
842 				if (inp->inp_faddr.s_addr != INADDR_ANY)
843 					wildcard++;
844 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
845 					if (laddr.s_addr == INADDR_ANY)
846 						wildcard++;
847 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
848 						continue;
849 				} else {
850 					if (laddr.s_addr != INADDR_ANY)
851 						wildcard++;
852 				}
853 				if (wildcard < matchwild) {
854 					match = inp;
855 					matchwild = wildcard;
856 					if (matchwild == 0) {
857 						break;
858 					}
859 				}
860 			}
861 		}
862 		return (match);
863 	}
864 }
865 
866 /*
867  * Lookup PCB in hash list.
868  */
869 struct inpcb *
870 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
871 		  ifp)
872 	struct inpcbinfo *pcbinfo;
873 	struct in_addr faddr, laddr;
874 	u_int fport_arg, lport_arg;
875 	int wildcard;
876 	struct ifnet *ifp;
877 {
878 	struct inpcbhead *head;
879 	register struct inpcb *inp;
880 	u_short fport = fport_arg, lport = lport_arg;
881 
882 	/*
883 	 * First look for an exact match.
884 	 */
885 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
886 	LIST_FOREACH(inp, head, inp_hash) {
887 #ifdef INET6
888 		if ((inp->inp_vflag & INP_IPV4) == 0)
889 			continue;
890 #endif
891 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
892 		    inp->inp_laddr.s_addr == laddr.s_addr &&
893 		    inp->inp_fport == fport &&
894 		    inp->inp_lport == lport) {
895 			/*
896 			 * Found.
897 			 */
898 			return (inp);
899 		}
900 	}
901 	if (wildcard) {
902 		struct inpcb *local_wild = NULL;
903 #if defined(INET6)
904 		struct inpcb *local_wild_mapped = NULL;
905 #endif /* defined(INET6) */
906 
907 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
908 		LIST_FOREACH(inp, head, inp_hash) {
909 #ifdef INET6
910 			if ((inp->inp_vflag & INP_IPV4) == 0)
911 				continue;
912 #endif
913 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
914 			    inp->inp_lport == lport) {
915 				if (ifp && ifp->if_type == IFT_FAITH &&
916 				    (inp->inp_flags & INP_FAITH) == 0)
917 					continue;
918 				if (inp->inp_laddr.s_addr == laddr.s_addr)
919 					return (inp);
920 				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
921 #if defined(INET6)
922 					if (INP_CHECK_SOCKAF(inp->inp_socket,
923 							     AF_INET6))
924 						local_wild_mapped = inp;
925 					else
926 #endif /* defined(INET6) */
927 					local_wild = inp;
928 				}
929 			}
930 		}
931 #if defined(INET6)
932 		if (local_wild == NULL)
933 			return (local_wild_mapped);
934 #endif /* defined(INET6) */
935 		return (local_wild);
936 	}
937 
938 	/*
939 	 * Not found.
940 	 */
941 	return (NULL);
942 }
943 
944 /*
945  * Insert PCB onto various hash lists.
946  */
947 int
948 in_pcbinshash(inp)
949 	struct inpcb *inp;
950 {
951 	struct inpcbhead *pcbhash;
952 	struct inpcbporthead *pcbporthash;
953 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
954 	struct inpcbport *phd;
955 	u_int32_t hashkey_faddr;
956 
957 #ifdef INET6
958 	if (inp->inp_vflag & INP_IPV6)
959 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
960 	else
961 #endif /* INET6 */
962 	hashkey_faddr = inp->inp_faddr.s_addr;
963 
964 	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
965 		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
966 
967 	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
968 	    pcbinfo->porthashmask)];
969 
970 	/*
971 	 * Go through port list and look for a head for this lport.
972 	 */
973 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
974 		if (phd->phd_port == inp->inp_lport)
975 			break;
976 	}
977 	/*
978 	 * If none exists, malloc one and tack it on.
979 	 */
980 	if (phd == NULL) {
981 		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
982 		if (phd == NULL) {
983 			return (ENOBUFS); /* XXX */
984 		}
985 		phd->phd_port = inp->inp_lport;
986 		LIST_INIT(&phd->phd_pcblist);
987 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
988 	}
989 	inp->inp_phd = phd;
990 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
991 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
992 	return (0);
993 }
994 
995 /*
996  * Move PCB to the proper hash bucket when { faddr, fport } have  been
997  * changed. NOTE: This does not handle the case of the lport changing (the
998  * hashed port list would have to be updated as well), so the lport must
999  * not change after in_pcbinshash() has been called.
1000  */
1001 void
1002 in_pcbrehash(inp)
1003 	struct inpcb *inp;
1004 {
1005 	struct inpcbhead *head;
1006 	u_int32_t hashkey_faddr;
1007 
1008 #ifdef INET6
1009 	if (inp->inp_vflag & INP_IPV6)
1010 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1011 	else
1012 #endif /* INET6 */
1013 	hashkey_faddr = inp->inp_faddr.s_addr;
1014 
1015 	head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
1016 		inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
1017 
1018 	LIST_REMOVE(inp, inp_hash);
1019 	LIST_INSERT_HEAD(head, inp, inp_hash);
1020 }
1021 
1022 /*
1023  * Remove PCB from various lists.
1024  */
1025 void
1026 in_pcbremlists(inp)
1027 	struct inpcb *inp;
1028 {
1029 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1030 	if (inp->inp_lport) {
1031 		struct inpcbport *phd = inp->inp_phd;
1032 
1033 		LIST_REMOVE(inp, inp_hash);
1034 		LIST_REMOVE(inp, inp_portlist);
1035 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1036 			LIST_REMOVE(phd, phd_hash);
1037 			free(phd, M_PCB);
1038 		}
1039 	}
1040 	LIST_REMOVE(inp, inp_list);
1041 	inp->inp_pcbinfo->ipi_count--;
1042 }
1043 
1044 int
1045 prison_xinpcb(struct proc *p, struct inpcb *inp)
1046 {
1047 	if (!jailed(p->p_ucred))
1048 		return (0);
1049 	if (ntohl(inp->inp_laddr.s_addr) == p->p_ucred->cr_prison->pr_ip)
1050 		return (0);
1051 	return (1);
1052 }
1053