xref: /freebsd/sys/netinet/raw_ip.c (revision 4a0f765fbf09711e612e86fce8bb09ec43f482d9)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
34  *	$Id: raw_ip.c,v 1.41 1997/02/13 19:46:45 wollman Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/errno.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/protosw.h>
44 #include <sys/queue.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48 
49 #include <net/if.h>
50 #include <net/route.h>
51 
52 #define _IP_VHL
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
56 #include <netinet/in_pcb.h>
57 #include <netinet/in_var.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/ip_mroute.h>
60 
61 #include <netinet/ip_fw.h>
62 
63 #if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
64 #undef COMPAT_IPFW
65 #define COMPAT_IPFW 1
66 #else
67 #undef COMPAT_IPFW
68 #endif
69 
70 static struct inpcbhead ripcb;
71 static struct inpcbinfo ripcbinfo;
72 
73 /*
74  * Nominal space allocated to a raw ip socket.
75  */
76 #define	RIPSNDQ		8192
77 #define	RIPRCVQ		8192
78 
79 /*
80  * Raw interface to IP protocol.
81  */
82 
83 /*
84  * Initialize raw connection block q.
85  */
86 void
87 rip_init()
88 {
89 	LIST_INIT(&ripcb);
90 	ripcbinfo.listhead = &ripcb;
91 	/*
92 	 * XXX We don't use the hash list for raw IP, but it's easier
93 	 * to allocate a one entry hash list than it is to check all
94 	 * over the place for hashbase == NULL.
95 	 */
96 	ripcbinfo.hashbase = phashinit(1, M_PCB, &ripcbinfo.hashsize);
97 }
98 
99 static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
100 /*
101  * Setup generic address and protocol structures
102  * for raw_input routine, then pass them along with
103  * mbuf chain.
104  */
105 void
106 rip_input(m, iphlen)
107 	struct mbuf *m;
108 	int iphlen;
109 {
110 	register struct ip *ip = mtod(m, struct ip *);
111 	register struct inpcb *inp;
112 	struct inpcb *last = 0;
113 	struct mbuf *opts = 0;
114 
115 	ripsrc.sin_addr = ip->ip_src;
116 	for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
117 		if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p)
118 			continue;
119 		if (inp->inp_laddr.s_addr &&
120                   inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
121 			continue;
122 		if (inp->inp_faddr.s_addr &&
123                   inp->inp_faddr.s_addr != ip->ip_src.s_addr)
124 			continue;
125 		if (last) {
126 			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
127 			if (n) {
128 				if (last->inp_flags & INP_CONTROLOPTS ||
129 				    last->inp_socket->so_options & SO_TIMESTAMP)
130 				    ip_savecontrol(last, &opts, ip, n);
131 				if (sbappendaddr(&last->inp_socket->so_rcv,
132 				    (struct sockaddr *)&ripsrc, n,
133 				    opts) == 0) {
134 					/* should notify about lost packet */
135 					m_freem(n);
136 					if (opts)
137 					    m_freem(opts);
138 				} else
139 					sorwakeup(last->inp_socket);
140 				opts = 0;
141 			}
142 		}
143 		last = inp;
144 	}
145 	if (last) {
146 		if (last->inp_flags & INP_CONTROLOPTS ||
147 		    last->inp_socket->so_options & SO_TIMESTAMP)
148 			ip_savecontrol(last, &opts, ip, m);
149 		if (sbappendaddr(&last->inp_socket->so_rcv,
150 		    (struct sockaddr *)&ripsrc, m, opts) == 0) {
151 			m_freem(m);
152 			if (opts)
153 			    m_freem(opts);
154 		} else
155 			sorwakeup(last->inp_socket);
156 	} else {
157 		m_freem(m);
158               ipstat.ips_noproto++;
159               ipstat.ips_delivered--;
160       }
161 }
162 
163 /*
164  * Generate IP header and pass packet to ip_output.
165  * Tack on options user may have setup with control call.
166  */
167 int
168 rip_output(m, so, dst)
169 	register struct mbuf *m;
170 	struct socket *so;
171 	u_long dst;
172 {
173 	register struct ip *ip;
174 	register struct inpcb *inp = sotoinpcb(so);
175 	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
176 
177 	/*
178 	 * If the user handed us a complete IP packet, use it.
179 	 * Otherwise, allocate an mbuf for a header and fill it in.
180 	 */
181 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
182 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
183 			m_freem(m);
184 			return(EMSGSIZE);
185 		}
186 		M_PREPEND(m, sizeof(struct ip), M_WAIT);
187 		ip = mtod(m, struct ip *);
188 		ip->ip_tos = 0;
189 		ip->ip_off = 0;
190 		ip->ip_p = inp->inp_ip.ip_p;
191 		ip->ip_len = m->m_pkthdr.len;
192 		ip->ip_src = inp->inp_laddr;
193 		ip->ip_dst.s_addr = dst;
194 		ip->ip_ttl = MAXTTL;
195 	} else {
196 		if (m->m_pkthdr.len > IP_MAXPACKET) {
197 			m_freem(m);
198 			return(EMSGSIZE);
199 		}
200 		ip = mtod(m, struct ip *);
201 		/* don't allow both user specified and setsockopt options,
202 		   and don't allow packet length sizes that will crash */
203 		if (((IP_VHL_HL(ip->ip_vhl) != (sizeof (*ip) >> 2))
204 		     && inp->inp_options)
205 		    || (ip->ip_len > m->m_pkthdr.len)) {
206 			m_freem(m);
207 			return EINVAL;
208 		}
209 		if (ip->ip_id == 0)
210 			ip->ip_id = htons(ip_id++);
211 		/* XXX prevent ip_output from overwriting header fields */
212 		flags |= IP_RAWOUTPUT;
213 		ipstat.ips_rawout++;
214 	}
215 	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
216 			  inp->inp_moptions));
217 }
218 
219 /*
220  * Raw IP socket option processing.
221  */
222 int
223 rip_ctloutput(op, so, level, optname, m)
224 	int op;
225 	struct socket *so;
226 	int level, optname;
227 	struct mbuf **m;
228 {
229 	register struct inpcb *inp = sotoinpcb(so);
230 	register int error;
231 
232 	if (level != IPPROTO_IP) {
233 		if (op == PRCO_SETOPT && *m)
234 			(void)m_free(*m);
235 		return (EINVAL);
236 	}
237 
238 	switch (optname) {
239 
240 	case IP_HDRINCL:
241 		error = 0;
242 		if (op == PRCO_SETOPT) {
243 			if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int))
244 				error = EINVAL;
245 			else if (*mtod(*m, int *))
246 				inp->inp_flags |= INP_HDRINCL;
247 			else
248 				inp->inp_flags &= ~INP_HDRINCL;
249 			if (*m)
250 				(void)m_free(*m);
251 		} else {
252 			*m = m_get(M_WAIT, MT_SOOPTS);
253 			(*m)->m_len = sizeof (int);
254 			*mtod(*m, int *) = inp->inp_flags & INP_HDRINCL;
255 		}
256 		return (error);
257 
258 #ifdef COMPAT_IPFW
259 	case IP_FW_GET:
260 		if (ip_fw_ctl_ptr == NULL || op == PRCO_SETOPT) {
261 			if (*m) (void)m_free(*m);
262 			return(EINVAL);
263 		}
264 		return (*ip_fw_ctl_ptr)(optname, m);
265 
266 	case IP_FW_ADD:
267 	case IP_FW_DEL:
268 	case IP_FW_FLUSH:
269 	case IP_FW_ZERO:
270 		if (ip_fw_ctl_ptr == NULL || op != PRCO_SETOPT) {
271 			if (*m) (void)m_free(*m);
272 			return(EINVAL);
273 		}
274 		return (*ip_fw_ctl_ptr)(optname, m);
275 
276 	case IP_NAT:
277 		if (ip_nat_ctl_ptr == NULL) {
278 			if (*m) (void)m_free(*m);
279 			return(EINVAL);
280 		}
281 		return (*ip_nat_ctl_ptr)(op, m);
282 
283 #endif
284 	case IP_RSVP_ON:
285 		return ip_rsvp_init(so);
286 		break;
287 
288 	case IP_RSVP_OFF:
289 		return ip_rsvp_done();
290 		break;
291 
292 	case IP_RSVP_VIF_ON:
293 		return ip_rsvp_vif_init(so, *m);
294 
295 	case IP_RSVP_VIF_OFF:
296 		return ip_rsvp_vif_done(so, *m);
297 
298 	case MRT_INIT:
299 	case MRT_DONE:
300 	case MRT_ADD_VIF:
301 	case MRT_DEL_VIF:
302 	case MRT_ADD_MFC:
303 	case MRT_DEL_MFC:
304 	case MRT_VERSION:
305 	case MRT_ASSERT:
306 		if (op == PRCO_SETOPT) {
307 			error = ip_mrouter_set(optname, so, *m);
308 			if (*m)
309 				(void)m_free(*m);
310 		} else if (op == PRCO_GETOPT) {
311 			error = ip_mrouter_get(optname, so, m);
312 		} else
313 			error = EINVAL;
314 		return (error);
315 	}
316 	return (ip_ctloutput(op, so, level, optname, m));
317 }
318 
319 /*
320  * This function exists solely to receive the PRC_IFDOWN messages which
321  * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
322  * and calls in_ifadown() to remove all routes corresponding to that address.
323  * It also receives the PRC_IFUP messages from if_up() and reinstalls the
324  * interface routes.
325  */
326 void
327 rip_ctlinput(cmd, sa, vip)
328 	int cmd;
329 	struct sockaddr *sa;
330 	void *vip;
331 {
332 	struct in_ifaddr *ia;
333 	struct ifnet *ifp;
334 	int err;
335 	int flags;
336 
337 	switch(cmd) {
338 	case PRC_IFDOWN:
339 		for (ia = in_ifaddrhead.tqh_first; ia;
340 		     ia = ia->ia_link.tqe_next) {
341 			if (ia->ia_ifa.ifa_addr == sa
342 			    && (ia->ia_flags & IFA_ROUTE)) {
343 				/*
344 				 * in_ifscrub kills the interface route.
345 				 */
346 				in_ifscrub(ia->ia_ifp, ia);
347 				/*
348 				 * in_ifadown gets rid of all the rest of
349 				 * the routes.  This is not quite the right
350 				 * thing to do, but at least if we are running
351 				 * a routing process they will come back.
352 				 */
353 				in_ifadown(&ia->ia_ifa);
354 				break;
355 			}
356 		}
357 		break;
358 
359 	case PRC_IFUP:
360 		for (ia = in_ifaddrhead.tqh_first; ia;
361 		     ia = ia->ia_link.tqe_next) {
362 			if (ia->ia_ifa.ifa_addr == sa)
363 				break;
364 		}
365 		if (ia == 0 || (ia->ia_flags & IFA_ROUTE))
366 			return;
367 		flags = RTF_UP;
368 		ifp = ia->ia_ifa.ifa_ifp;
369 
370 		if ((ifp->if_flags & IFF_LOOPBACK)
371 		    || (ifp->if_flags & IFF_POINTOPOINT))
372 			flags |= RTF_HOST;
373 
374 		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
375 		if (err == 0)
376 			ia->ia_flags |= IFA_ROUTE;
377 		break;
378 	}
379 }
380 
381 static u_long	rip_sendspace = RIPSNDQ;
382 static u_long	rip_recvspace = RIPRCVQ;
383 
384 SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace,
385 	   0, "");
386 SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace,
387 	   0, "");
388 
389 static int
390 rip_attach(struct socket *so, int proto)
391 {
392 	struct inpcb *inp;
393 	int error;
394 
395 	inp = sotoinpcb(so);
396 	if (inp)
397 		panic("rip_attach");
398 	if ((so->so_state & SS_PRIV) == 0)
399 		return EACCES;
400 
401 	if ((error = soreserve(so, rip_sendspace, rip_recvspace)) ||
402 	    (error = in_pcballoc(so, &ripcbinfo)))
403 		return error;
404 	inp = (struct inpcb *)so->so_pcb;
405 	inp->inp_ip.ip_p = proto;
406 	return 0;
407 }
408 
409 static int
410 rip_detach(struct socket *so)
411 {
412 	struct inpcb *inp;
413 
414 	inp = sotoinpcb(so);
415 	if (inp == 0)
416 		panic("rip_detach");
417 	if (so == ip_mrouter)
418 		ip_mrouter_done();
419 	ip_rsvp_force_done(so);
420 	if (so == ip_rsvpd)
421 		ip_rsvp_done();
422 	in_pcbdetach(inp);
423 	return 0;
424 }
425 
426 static int
427 rip_abort(struct socket *so)
428 {
429 	soisdisconnected(so);
430 	return rip_detach(so);
431 }
432 
433 static int
434 rip_disconnect(struct socket *so)
435 {
436 	if ((so->so_state & SS_ISCONNECTED) == 0)
437 		return ENOTCONN;
438 	return rip_abort(so);
439 }
440 
441 static int
442 rip_bind(struct socket *so, struct mbuf *nam)
443 {
444 	struct inpcb *inp = sotoinpcb(so);
445 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
446 
447 	if (nam->m_len != sizeof(*addr))
448 		return EINVAL;
449 
450 	if (TAILQ_EMPTY(&ifnet) || ((addr->sin_family != AF_INET) &&
451 				    (addr->sin_family != AF_IMPLINK)) ||
452 	    (addr->sin_addr.s_addr &&
453 	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
454 		return EADDRNOTAVAIL;
455 	inp->inp_laddr = addr->sin_addr;
456 	return 0;
457 }
458 
459 static int
460 rip_connect(struct socket *so, struct mbuf *nam)
461 {
462 	struct inpcb *inp = sotoinpcb(so);
463 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
464 
465 	if (nam->m_len != sizeof(*addr))
466 		return EINVAL;
467 	if (TAILQ_EMPTY(&ifnet))
468 		return EADDRNOTAVAIL;
469 	if ((addr->sin_family != AF_INET) &&
470 	    (addr->sin_family != AF_IMPLINK))
471 		return EAFNOSUPPORT;
472 	inp->inp_faddr = addr->sin_addr;
473 	soisconnected(so);
474 	return 0;
475 }
476 
477 static int
478 rip_shutdown(struct socket *so)
479 {
480 	socantsendmore(so);
481 	return 0;
482 }
483 
484 static int
485 rip_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *nam,
486 	 struct mbuf *control)
487 {
488 	struct inpcb *inp = sotoinpcb(so);
489 	register u_long dst;
490 
491 	if (so->so_state & SS_ISCONNECTED) {
492 		if (nam) {
493 			m_freem(m);
494 			return EISCONN;
495 		}
496 		dst = inp->inp_faddr.s_addr;
497 	} else {
498 		if (nam == NULL) {
499 			m_freem(m);
500 			return ENOTCONN;
501 		}
502 		dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
503 	}
504 	return rip_output(m, so, dst);
505 }
506 
507 struct pr_usrreqs rip_usrreqs = {
508 	rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
509 	pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
510 	pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp,
511 	pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
512 	in_setsockaddr
513 };
514