xref: /freebsd/sys/netinet/ip_divert.c (revision 4a0f765fbf09711e612e86fce8bb09ec43f482d9)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	$Id$
34  */
35 
36 #include <sys/param.h>
37 #include <sys/queue.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/socket.h>
41 #include <sys/protosw.h>
42 #include <sys/socketvar.h>
43 #include <sys/errno.h>
44 #include <sys/systm.h>
45 
46 #include <net/if.h>
47 #include <net/route.h>
48 
49 #include <netinet/in.h>
50 #include <netinet/in_systm.h>
51 #include <netinet/ip.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/in_var.h>
54 #include <netinet/ip_var.h>
55 
56 /*
57  * Divert sockets
58  */
59 
60 /*
61  * Allocate enough space to hold a full IP packet
62  */
63 #define	DIVSNDQ		(65536 + 100)
64 #define	DIVRCVQ		(65536 + 100)
65 
66 /* Global variables */
67 
68 /*
69  * ip_input() and ip_output() set this secret value before calling us to
70  * let us know which divert port to divert a packet to; this is done so
71  * we can use the existing prototype for struct protosw's pr_input().
72  * This is stored in host order.
73  */
74 u_short ip_divert_port;
75 
76 /*
77  * We set this value to a non-zero port number when we want the call to
78  * ip_fw_chk() in ip_input() or ip_output() to ignore ``divert <port>''
79  * chain entries. This is stored in host order.
80  */
81 u_short ip_divert_ignore;
82 
83 /* Internal variables */
84 
85 static struct inpcbhead divcb;
86 static struct inpcbinfo divcbinfo;
87 
88 static u_long	div_sendspace = DIVSNDQ;	/* XXX sysctl ? */
89 static u_long	div_recvspace = DIVRCVQ;	/* XXX sysctl ? */
90 
91 /* Optimization: have this preinitialized */
92 static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET };
93 
94 /* Internal functions */
95 
96 static int div_output(struct socket *so,
97 		struct mbuf *m, struct mbuf *addr, struct mbuf *control);
98 
99 /*
100  * Initialize divert connection block queue.
101  */
102 void
103 div_init(void)
104 {
105 	LIST_INIT(&divcb);
106 	divcbinfo.listhead = &divcb;
107 	/*
108 	 * XXX We don't use the hash list for divert IP, but it's easier
109 	 * to allocate a one entry hash list than it is to check all
110 	 * over the place for hashbase == NULL.
111 	 */
112 	divcbinfo.hashbase = phashinit(1, M_PCB, &divcbinfo.hashsize);
113 }
114 
115 /*
116  * Setup generic address and protocol structures
117  * for div_input routine, then pass them along with
118  * mbuf chain. ip->ip_len is assumed to have had
119  * the header length (hlen) subtracted out already.
120  * We tell whether the packet was incoming or outgoing
121  * by seeing if hlen == 0, which is a hack.
122  */
123 void
124 div_input(struct mbuf *m, int hlen)
125 {
126 	register struct ip *ip = mtod(m, struct ip *);
127 	register struct inpcb *inp;
128 	register struct socket *sa;
129 
130 	/* Sanity check */
131 	if (ip_divert_port == 0)
132 		panic("div_input");
133 
134 	/* Record divert port */
135 	divsrc.sin_port = htons(ip_divert_port);
136 
137 	/* Restore packet header fields */
138 	ip->ip_len += hlen;
139 	HTONS(ip->ip_len);
140 	HTONS(ip->ip_off);
141 
142 	/* Record receive interface address, if any */
143 	divsrc.sin_addr.s_addr = 0;
144 	if (hlen) {
145 		struct ifaddr *ifa;
146 
147 		/* More fields affected by ip_input() */
148 		HTONS(ip->ip_id);
149 
150 		/* Find IP address for recieve interface */
151 		for (ifa = m->m_pkthdr.rcvif->if_addrhead.tqh_first;
152 		    ifa != NULL; ifa = ifa->ifa_link.tqe_next) {
153 			if (ifa->ifa_addr == NULL)
154 				continue;
155 			if (ifa->ifa_addr->sa_family != AF_INET)
156 				continue;
157 			divsrc.sin_addr =
158 			    ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
159 			break;
160 		}
161 	}
162 
163 	/* Put packet on socket queue, if any */
164 	sa = NULL;
165 	for (inp = divcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
166 		if (inp->inp_lport == htons(ip_divert_port))
167 			sa = inp->inp_socket;
168 	}
169 	if (sa) {
170 		if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc,
171 				m, (struct mbuf *)0) == 0)
172 			m_freem(m);
173 		else
174 			sorwakeup(sa);
175 	} else {
176 		m_freem(m);
177 		ipstat.ips_noproto++;
178 		ipstat.ips_delivered--;
179         }
180 }
181 
182 /*
183  * Deliver packet back into the IP processing machinery.
184  *
185  * If no address specified, or address is 0.0.0.0, send to ip_output();
186  * otherwise, send to ip_input() and mark as having been received on
187  * the interface with that address.
188  *
189  * If no address specified, or dest port is 0, allow packet to divert
190  * back to this socket; otherwise, don't.
191  */
192 static int
193 div_output(so, m, addr, control)
194 	struct socket *so;
195 	register struct mbuf *m;
196 	struct mbuf *addr, *control;
197 {
198 	register struct inpcb *const inp = sotoinpcb(so);
199 	register struct ip *const ip = mtod(m, struct ip *);
200 	struct sockaddr_in *sin = NULL;
201 	int error = 0;
202 
203 	if (control)
204 		m_freem(control);		/* XXX */
205 	if (addr)
206 		sin = mtod(addr, struct sockaddr_in *);
207 
208 	/* Loopback avoidance option */
209 	ip_divert_ignore = ntohs(inp->inp_lport);
210 
211 	/* Reinject packet into the system as incoming or outgoing */
212 	if (!sin || sin->sin_addr.s_addr == 0) {
213 		/* Don't allow both user specified and setsockopt options,
214 		   and don't allow packet length sizes that will crash */
215 		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) ||
216 		     ((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
217 			error = EINVAL;
218 			goto cantsend;
219 		}
220 
221 		/* Convert fields to host order for ip_output() */
222 		NTOHS(ip->ip_len);
223 		NTOHS(ip->ip_off);
224 
225 		/* Send packet to output processing */
226 		ipstat.ips_rawout++;			/* XXX */
227 		error = ip_output(m, inp->inp_options, &inp->inp_route,
228 			(so->so_options & SO_DONTROUTE) |
229 			IP_ALLOWBROADCAST | IP_RAWOUTPUT, inp->inp_moptions);
230 	} else {
231 		struct ifaddr *ifa;
232 
233 		/* Find receive interface with the given IP address */
234 		sin->sin_port = 0;
235 		if ((ifa = ifa_ifwithaddr((struct sockaddr *) sin)) == 0) {
236 			error = EADDRNOTAVAIL;
237 			goto cantsend;
238 		}
239 		m->m_pkthdr.rcvif = ifa->ifa_ifp;
240 
241 		/* Send packet to input processing */
242 		ip_input(m);
243 	}
244 
245 	/* Reset for next time (and other packets) */
246 	ip_divert_ignore = 0;
247 	return error;
248 
249 cantsend:
250 	ip_divert_ignore = 0;
251 	m_freem(m);
252 	return error;
253 }
254 
255 /*ARGSUSED*/
256 int
257 div_usrreq(so, req, m, nam, control)
258 	register struct socket *so;
259 	int req;
260 	struct mbuf *m, *nam, *control;
261 {
262 	register int error = 0;
263 	register struct inpcb *inp = sotoinpcb(so);
264 	int s = 0;
265 
266 	if (inp == NULL && req != PRU_ATTACH) {
267 		error = EINVAL;
268 		goto release;
269 	}
270 	switch (req) {
271 
272 	case PRU_ATTACH:
273 		if (inp)
274 			panic("div_attach");
275 		if ((so->so_state & SS_PRIV) == 0) {
276 			error = EACCES;
277 			break;
278 		}
279 		if ((error = soreserve(so, div_sendspace, div_recvspace)) ||
280 		    (error = in_pcballoc(so, &divcbinfo)))
281 			break;
282 		inp = (struct inpcb *)so->so_pcb;
283 		inp->inp_ip.ip_p = (int)nam;	/* XXX */
284 		inp->inp_flags |= INP_HDRINCL;
285 		/* The socket is always "connected" because
286 		   we always know "where" to send the packet */
287 		so->so_state |= SS_ISCONNECTED;
288 		break;
289 
290 	case PRU_DISCONNECT:
291 		if ((so->so_state & SS_ISCONNECTED) == 0) {
292 			error = ENOTCONN;
293 			break;
294 		}
295 		/* FALLTHROUGH */
296 	case PRU_ABORT:
297 		soisdisconnected(so);
298 		/* FALLTHROUGH */
299 	case PRU_DETACH:
300 		if (inp == 0)
301 			panic("div_detach");
302 		in_pcbdetach(inp);
303 		break;
304 
305 	case PRU_BIND:
306 		s = splnet();
307 		error = in_pcbbind(inp, nam);
308 		splx(s);
309 		break;
310 
311 	/*
312 	 * Mark the connection as being incapable of further input.
313 	 */
314 	case PRU_SHUTDOWN:
315 		socantsendmore(so);
316 		break;
317 
318 	case PRU_SEND:
319 		/* Packet must have a header (but that's about it) */
320 		if (m->m_len < sizeof (struct ip) ||
321 		    (m = m_pullup(m, sizeof (struct ip))) == 0) {
322 			ipstat.ips_toosmall++;
323 			error = EINVAL;
324 			break;
325 		}
326 
327 		/* Send packet */
328 		error = div_output(so, m, nam, control);
329 		m = NULL;
330 		break;
331 
332 	case PRU_SOCKADDR:
333 		in_setsockaddr(so, nam);
334 		break;
335 
336 	case PRU_SENSE:
337 		/*
338 		 * stat: don't bother with a blocksize.
339 		 */
340 		return (0);
341 
342 	/*
343 	 * Not supported.
344 	 */
345 	case PRU_CONNECT:
346 	case PRU_CONNECT2:
347 	case PRU_CONTROL:
348 	case PRU_RCVOOB:
349 	case PRU_RCVD:
350 	case PRU_LISTEN:
351 	case PRU_ACCEPT:
352 	case PRU_SENDOOB:
353 	case PRU_PEERADDR:
354 		error = EOPNOTSUPP;
355 		break;
356 
357 	default:
358 		panic("div_usrreq");
359 	}
360 release:
361 	if (m)
362 		m_freem(m);
363 	return (error);
364 }
365