xref: /freebsd/sys/netinet/raw_ip.c (revision 7660b554bc59a07be0431c17e0e33815818baa69)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_inet6.h"
38 #include "opt_ipsec.h"
39 #include "opt_mac.h"
40 #include "opt_random_ip_id.h"
41 
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/mac.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/proc.h>
49 #include <sys/protosw.h>
50 #include <sys/signalvar.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/sx.h>
54 #include <sys/sysctl.h>
55 #include <sys/systm.h>
56 
57 #include <vm/uma.h>
58 
59 #include <net/if.h>
60 #include <net/route.h>
61 
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/ip_mroute.h>
69 
70 #include <netinet/ip_fw.h>
71 #include <netinet/ip_dummynet.h>
72 
73 #ifdef FAST_IPSEC
74 #include <netipsec/ipsec.h>
75 #endif /*FAST_IPSEC*/
76 
77 #ifdef IPSEC
78 #include <netinet6/ipsec.h>
79 #endif /*IPSEC*/
80 
81 struct	inpcbhead ripcb;
82 struct	inpcbinfo ripcbinfo;
83 
84 /* control hooks for ipfw and dummynet */
85 ip_fw_ctl_t *ip_fw_ctl_ptr;
86 ip_dn_ctl_t *ip_dn_ctl_ptr;
87 
88 /*
89  * hooks for multicast routing. They all default to NULL,
90  * so leave them not initialized and rely on BSS being set to 0.
91  */
92 
93 /* The socket used to communicate with the multicast routing daemon.  */
94 struct socket  *ip_mrouter;
95 
96 /* The various mrouter and rsvp functions */
97 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
98 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
99 int (*ip_mrouter_done)(void);
100 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
101                    struct ip_moptions *);
102 int (*mrt_ioctl)(int, caddr_t);
103 int (*legal_vif_num)(int);
104 u_long (*ip_mcast_src)(int);
105 
106 void (*rsvp_input_p)(struct mbuf *m, int off);
107 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
108 void (*ip_rsvp_force_done)(struct socket *);
109 
110 /*
111  * Nominal space allocated to a raw ip socket.
112  */
113 #define	RIPSNDQ		8192
114 #define	RIPRCVQ		8192
115 
116 /*
117  * Raw interface to IP protocol.
118  */
119 
120 /*
121  * Initialize raw connection block q.
122  */
123 void
124 rip_init()
125 {
126 	INP_INFO_LOCK_INIT(&ripcbinfo, "rip");
127 	LIST_INIT(&ripcb);
128 	ripcbinfo.listhead = &ripcb;
129 	/*
130 	 * XXX We don't use the hash list for raw IP, but it's easier
131 	 * to allocate a one entry hash list than it is to check all
132 	 * over the place for hashbase == NULL.
133 	 */
134 	ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
135 	ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
136 	ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
137 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
138 	uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
139 }
140 
141 static struct	sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
142 
143 static int
144 raw_append(struct inpcb *last, struct ip *ip, struct mbuf *n)
145 {
146 	int policyfail = 0;
147 
148 #ifdef IPSEC
149 	/* check AH/ESP integrity. */
150 	if (ipsec4_in_reject_so(n, last->inp_socket)) {
151 		policyfail = 1;
152 		ipsecstat.in_polvio++;
153 		/* do not inject data to pcb */
154 	}
155 #endif /*IPSEC*/
156 #ifdef FAST_IPSEC
157 	/* check AH/ESP integrity. */
158 	if (ipsec4_in_reject(n, last)) {
159 		policyfail = 1;
160 		/* do not inject data to pcb */
161 	}
162 #endif /*FAST_IPSEC*/
163 #ifdef MAC
164 	if (!policyfail && mac_check_socket_deliver(last->inp_socket, n) != 0)
165 		policyfail = 1;
166 #endif
167 	if (!policyfail) {
168 		struct mbuf *opts = NULL;
169 
170 		if ((last->inp_flags & INP_CONTROLOPTS) ||
171 		    (last->inp_socket->so_options & SO_TIMESTAMP))
172 			ip_savecontrol(last, &opts, ip, n);
173 		if (sbappendaddr(&last->inp_socket->so_rcv,
174 		    (struct sockaddr *)&ripsrc, n, opts) == 0) {
175 			/* should notify about lost packet */
176 			m_freem(n);
177 			if (opts)
178 				m_freem(opts);
179 		} else
180 			sorwakeup(last->inp_socket);
181 	} else
182 		m_freem(n);
183 	return policyfail;
184 }
185 
186 /*
187  * Setup generic address and protocol structures
188  * for raw_input routine, then pass them along with
189  * mbuf chain.
190  */
191 void
192 rip_input(struct mbuf *m, int off)
193 {
194 	struct ip *ip = mtod(m, struct ip *);
195 	int proto = ip->ip_p;
196 	struct inpcb *inp, *last;
197 
198 	INP_INFO_RLOCK(&ripcbinfo);
199 	ripsrc.sin_addr = ip->ip_src;
200 	last = NULL;
201 	LIST_FOREACH(inp, &ripcb, inp_list) {
202 		INP_LOCK(inp);
203 		if (inp->inp_ip_p && inp->inp_ip_p != proto) {
204 	docontinue:
205 			INP_UNLOCK(inp);
206 			continue;
207 		}
208 #ifdef INET6
209 		if ((inp->inp_vflag & INP_IPV4) == 0)
210 			goto docontinue;
211 #endif
212 		if (inp->inp_laddr.s_addr &&
213                     inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
214 			goto docontinue;
215 		if (inp->inp_faddr.s_addr &&
216                     inp->inp_faddr.s_addr != ip->ip_src.s_addr)
217 			goto docontinue;
218 		if (last) {
219 			struct mbuf *n;
220 
221 			n = m_copy(m, 0, (int)M_COPYALL);
222 			if (n != NULL)
223 				(void) raw_append(last, ip, n);
224 			/* XXX count dropped packet */
225 			INP_UNLOCK(last);
226 		}
227 		last = inp;
228 	}
229 	if (last != NULL) {
230 		if (raw_append(last, ip, m) != 0)
231 			ipstat.ips_delivered--;
232 		INP_UNLOCK(last);
233 	} else {
234 		m_freem(m);
235 		ipstat.ips_noproto++;
236 		ipstat.ips_delivered--;
237 	}
238 	INP_INFO_RUNLOCK(&ripcbinfo);
239 }
240 
241 /*
242  * Generate IP header and pass packet to ip_output.
243  * Tack on options user may have setup with control call.
244  */
245 int
246 rip_output(struct mbuf *m, struct socket *so, u_long dst)
247 {
248 	struct ip *ip;
249 	struct inpcb *inp = sotoinpcb(so);
250 	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
251 
252 #ifdef MAC
253 	mac_create_mbuf_from_socket(so, m);
254 #endif
255 
256 	/*
257 	 * If the user handed us a complete IP packet, use it.
258 	 * Otherwise, allocate an mbuf for a header and fill it in.
259 	 */
260 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
261 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
262 			m_freem(m);
263 			return(EMSGSIZE);
264 		}
265 		M_PREPEND(m, sizeof(struct ip), M_TRYWAIT);
266 		if (m == NULL)
267 			return(ENOBUFS);
268 		ip = mtod(m, struct ip *);
269 		ip->ip_tos = inp->inp_ip_tos;
270 		ip->ip_off = 0;
271 		ip->ip_p = inp->inp_ip_p;
272 		ip->ip_len = m->m_pkthdr.len;
273 		ip->ip_src = inp->inp_laddr;
274 		ip->ip_dst.s_addr = dst;
275 		ip->ip_ttl = inp->inp_ip_ttl;
276 	} else {
277 		if (m->m_pkthdr.len > IP_MAXPACKET) {
278 			m_freem(m);
279 			return(EMSGSIZE);
280 		}
281 		ip = mtod(m, struct ip *);
282 		/* don't allow both user specified and setsockopt options,
283 		   and don't allow packet length sizes that will crash */
284 		if (((ip->ip_hl != (sizeof (*ip) >> 2))
285 		     && inp->inp_options)
286 		    || (ip->ip_len > m->m_pkthdr.len)
287 		    || (ip->ip_len < (ip->ip_hl << 2))) {
288 			m_freem(m);
289 			return EINVAL;
290 		}
291 		if (ip->ip_id == 0)
292 #ifdef RANDOM_IP_ID
293 			ip->ip_id = ip_randomid();
294 #else
295 			ip->ip_id = htons(ip_id++);
296 #endif
297 		/* XXX prevent ip_output from overwriting header fields */
298 		flags |= IP_RAWOUTPUT;
299 		ipstat.ips_rawout++;
300 	}
301 
302 	if (inp->inp_flags & INP_ONESBCAST)
303 		flags |= IP_SENDONES;
304 
305 	return (ip_output(m, inp->inp_options, &inp->inp_route, flags,
306 			  inp->inp_moptions, inp));
307 }
308 
309 /*
310  * Raw IP socket option processing.
311  *
312  * Note that access to all of the IP administrative functions here is
313  * implicitly protected by suser() as gaining access to a raw socket
314  * requires either that the thread pass a suser() check, or that it be
315  * passed a raw socket by another thread that has passed a suser() check.
316  * If FreeBSD moves to a more fine-grained access control mechanism,
317  * additional checks will need to be placed here if the raw IP attachment
318  * check is not equivilent the the check required for these
319  * administrative operations; in some cases, these checks are already
320  * present.
321  */
322 int
323 rip_ctloutput(struct socket *so, struct sockopt *sopt)
324 {
325 	struct	inpcb *inp = sotoinpcb(so);
326 	int	error, optval;
327 
328 	if (sopt->sopt_level != IPPROTO_IP)
329 		return (EINVAL);
330 
331 	error = 0;
332 
333 	switch (sopt->sopt_dir) {
334 	case SOPT_GET:
335 		switch (sopt->sopt_name) {
336 		case IP_HDRINCL:
337 			optval = inp->inp_flags & INP_HDRINCL;
338 			error = sooptcopyout(sopt, &optval, sizeof optval);
339 			break;
340 
341 		case IP_FW_ADD:	/* ADD actually returns the body... */
342 		case IP_FW_GET:
343 			if (IPFW_LOADED)
344 				error = ip_fw_ctl_ptr(sopt);
345 			else
346 				error = ENOPROTOOPT;
347 			break;
348 
349 		case IP_DUMMYNET_GET:
350 			if (DUMMYNET_LOADED)
351 				error = ip_dn_ctl_ptr(sopt);
352 			else
353 				error = ENOPROTOOPT;
354 			break ;
355 
356 		case MRT_INIT:
357 		case MRT_DONE:
358 		case MRT_ADD_VIF:
359 		case MRT_DEL_VIF:
360 		case MRT_ADD_MFC:
361 		case MRT_DEL_MFC:
362 		case MRT_VERSION:
363 		case MRT_ASSERT:
364 		case MRT_API_SUPPORT:
365 		case MRT_API_CONFIG:
366 		case MRT_ADD_BW_UPCALL:
367 		case MRT_DEL_BW_UPCALL:
368 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
369 				EOPNOTSUPP;
370 			break;
371 
372 		default:
373 			error = ip_ctloutput(so, sopt);
374 			break;
375 		}
376 		break;
377 
378 	case SOPT_SET:
379 		switch (sopt->sopt_name) {
380 		case IP_HDRINCL:
381 			error = sooptcopyin(sopt, &optval, sizeof optval,
382 					    sizeof optval);
383 			if (error)
384 				break;
385 			if (optval)
386 				inp->inp_flags |= INP_HDRINCL;
387 			else
388 				inp->inp_flags &= ~INP_HDRINCL;
389 			break;
390 
391 		case IP_FW_ADD:
392 		case IP_FW_DEL:
393 		case IP_FW_FLUSH:
394 		case IP_FW_ZERO:
395 		case IP_FW_RESETLOG:
396 			if (IPFW_LOADED)
397 				error = ip_fw_ctl_ptr(sopt);
398 			else
399 				error = ENOPROTOOPT;
400 			break;
401 
402 		case IP_DUMMYNET_CONFIGURE:
403 		case IP_DUMMYNET_DEL:
404 		case IP_DUMMYNET_FLUSH:
405 			if (DUMMYNET_LOADED)
406 				error = ip_dn_ctl_ptr(sopt);
407 			else
408 				error = ENOPROTOOPT ;
409 			break ;
410 
411 		case IP_RSVP_ON:
412 			error = ip_rsvp_init(so);
413 			break;
414 
415 		case IP_RSVP_OFF:
416 			error = ip_rsvp_done();
417 			break;
418 
419 		case IP_RSVP_VIF_ON:
420 		case IP_RSVP_VIF_OFF:
421 			error = ip_rsvp_vif ?
422 				ip_rsvp_vif(so, sopt) : EINVAL;
423 			break;
424 
425 		case MRT_INIT:
426 		case MRT_DONE:
427 		case MRT_ADD_VIF:
428 		case MRT_DEL_VIF:
429 		case MRT_ADD_MFC:
430 		case MRT_DEL_MFC:
431 		case MRT_VERSION:
432 		case MRT_ASSERT:
433 		case MRT_API_SUPPORT:
434 		case MRT_API_CONFIG:
435 		case MRT_ADD_BW_UPCALL:
436 		case MRT_DEL_BW_UPCALL:
437 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
438 					EOPNOTSUPP;
439 			break;
440 
441 		default:
442 			error = ip_ctloutput(so, sopt);
443 			break;
444 		}
445 		break;
446 	}
447 
448 	return (error);
449 }
450 
451 /*
452  * This function exists solely to receive the PRC_IFDOWN messages which
453  * are sent by if_down().  It looks for an ifaddr whose ifa_addr is sa,
454  * and calls in_ifadown() to remove all routes corresponding to that address.
455  * It also receives the PRC_IFUP messages from if_up() and reinstalls the
456  * interface routes.
457  */
458 void
459 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
460 {
461 	struct in_ifaddr *ia;
462 	struct ifnet *ifp;
463 	int err;
464 	int flags;
465 
466 	switch (cmd) {
467 	case PRC_IFDOWN:
468 		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
469 			if (ia->ia_ifa.ifa_addr == sa
470 			    && (ia->ia_flags & IFA_ROUTE)) {
471 				/*
472 				 * in_ifscrub kills the interface route.
473 				 */
474 				in_ifscrub(ia->ia_ifp, ia);
475 				/*
476 				 * in_ifadown gets rid of all the rest of
477 				 * the routes.  This is not quite the right
478 				 * thing to do, but at least if we are running
479 				 * a routing process they will come back.
480 				 */
481 				in_ifadown(&ia->ia_ifa, 0);
482 				break;
483 			}
484 		}
485 		break;
486 
487 	case PRC_IFUP:
488 		TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
489 			if (ia->ia_ifa.ifa_addr == sa)
490 				break;
491 		}
492 		if (ia == 0 || (ia->ia_flags & IFA_ROUTE))
493 			return;
494 		flags = RTF_UP;
495 		ifp = ia->ia_ifa.ifa_ifp;
496 
497 		if ((ifp->if_flags & IFF_LOOPBACK)
498 		    || (ifp->if_flags & IFF_POINTOPOINT))
499 			flags |= RTF_HOST;
500 
501 		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
502 		if (err == 0)
503 			ia->ia_flags |= IFA_ROUTE;
504 		break;
505 	}
506 }
507 
508 u_long	rip_sendspace = RIPSNDQ;
509 u_long	rip_recvspace = RIPRCVQ;
510 
511 SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
512     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
513 SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
514     &rip_recvspace, 0, "Maximum incoming raw IP datagram size");
515 
516 static int
517 rip_attach(struct socket *so, int proto, struct thread *td)
518 {
519 	struct inpcb *inp;
520 	int error;
521 
522 	/* XXX why not lower? */
523 	INP_INFO_WLOCK(&ripcbinfo);
524 	inp = sotoinpcb(so);
525 	if (inp) {
526 		/* XXX counter, printf */
527 		INP_INFO_WUNLOCK(&ripcbinfo);
528 		return EINVAL;
529 	}
530 	if (td && (error = suser(td)) != 0) {
531 		INP_INFO_WUNLOCK(&ripcbinfo);
532 		return error;
533 	}
534 	if (proto >= IPPROTO_MAX || proto < 0) {
535 		INP_INFO_WUNLOCK(&ripcbinfo);
536 		return EPROTONOSUPPORT;
537 	}
538 
539 	error = soreserve(so, rip_sendspace, rip_recvspace);
540 	if (error) {
541 		INP_INFO_WUNLOCK(&ripcbinfo);
542 		return error;
543 	}
544 	error = in_pcballoc(so, &ripcbinfo, td);
545 	if (error) {
546 		INP_INFO_WUNLOCK(&ripcbinfo);
547 		return error;
548 	}
549 	inp = (struct inpcb *)so->so_pcb;
550 	INP_LOCK(inp);
551 	INP_INFO_WUNLOCK(&ripcbinfo);
552 	inp->inp_vflag |= INP_IPV4;
553 	inp->inp_ip_p = proto;
554 	inp->inp_ip_ttl = ip_defttl;
555 	INP_UNLOCK(inp);
556 	return 0;
557 }
558 
559 static int
560 rip_detach(struct socket *so)
561 {
562 	struct inpcb *inp;
563 
564 	INP_INFO_WLOCK(&ripcbinfo);
565 	inp = sotoinpcb(so);
566 	if (inp == 0) {
567 		/* XXX counter, printf */
568 		INP_INFO_WUNLOCK(&ripcbinfo);
569 		return EINVAL;
570 	}
571 	INP_LOCK(inp);
572 	if (so == ip_mrouter && ip_mrouter_done)
573 		ip_mrouter_done();
574 	if (ip_rsvp_force_done)
575 		ip_rsvp_force_done(so);
576 	if (so == ip_rsvpd)
577 		ip_rsvp_done();
578 	in_pcbdetach(inp);
579 	INP_INFO_WUNLOCK(&ripcbinfo);
580 	return 0;
581 }
582 
583 static int
584 rip_abort(struct socket *so)
585 {
586 	soisdisconnected(so);
587 	if (so->so_state & SS_NOFDREF)
588 		return rip_detach(so);
589 	return 0;
590 }
591 
592 static int
593 rip_disconnect(struct socket *so)
594 {
595 	if ((so->so_state & SS_ISCONNECTED) == 0)
596 		return ENOTCONN;
597 	return rip_abort(so);
598 }
599 
600 static int
601 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
602 {
603 	struct inpcb *inp = sotoinpcb(so);
604 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
605 
606 	if (nam->sa_len != sizeof(*addr))
607 		return EINVAL;
608 
609 	if (TAILQ_EMPTY(&ifnet) || ((addr->sin_family != AF_INET) &&
610 				    (addr->sin_family != AF_IMPLINK)) ||
611 	    (addr->sin_addr.s_addr &&
612 	     ifa_ifwithaddr((struct sockaddr *)addr) == 0))
613 		return EADDRNOTAVAIL;
614 	inp->inp_laddr = addr->sin_addr;
615 	return 0;
616 }
617 
618 static int
619 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
620 {
621 	struct inpcb *inp = sotoinpcb(so);
622 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
623 
624 	if (nam->sa_len != sizeof(*addr))
625 		return EINVAL;
626 	if (TAILQ_EMPTY(&ifnet))
627 		return EADDRNOTAVAIL;
628 	if ((addr->sin_family != AF_INET) &&
629 	    (addr->sin_family != AF_IMPLINK))
630 		return EAFNOSUPPORT;
631 	inp->inp_faddr = addr->sin_addr;
632 	soisconnected(so);
633 	return 0;
634 }
635 
636 static int
637 rip_shutdown(struct socket *so)
638 {
639 	socantsendmore(so);
640 	return 0;
641 }
642 
643 static int
644 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
645 	 struct mbuf *control, struct thread *td)
646 {
647 	struct inpcb *inp = sotoinpcb(so);
648 	register u_long dst;
649 
650 	if (so->so_state & SS_ISCONNECTED) {
651 		if (nam) {
652 			m_freem(m);
653 			return EISCONN;
654 		}
655 		dst = inp->inp_faddr.s_addr;
656 	} else {
657 		if (nam == NULL) {
658 			m_freem(m);
659 			return ENOTCONN;
660 		}
661 		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
662 	}
663 	return rip_output(m, so, dst);
664 }
665 
666 static int
667 rip_pcblist(SYSCTL_HANDLER_ARGS)
668 {
669 	int error, i, n;
670 	struct inpcb *inp, **inp_list;
671 	inp_gen_t gencnt;
672 	struct xinpgen xig;
673 
674 	/*
675 	 * The process of preparing the TCB list is too time-consuming and
676 	 * resource-intensive to repeat twice on every request.
677 	 */
678 	if (req->oldptr == 0) {
679 		n = ripcbinfo.ipi_count;
680 		req->oldidx = 2 * (sizeof xig)
681 			+ (n + n/8) * sizeof(struct xinpcb);
682 		return 0;
683 	}
684 
685 	if (req->newptr != 0)
686 		return EPERM;
687 
688 	/*
689 	 * OK, now we're committed to doing something.
690 	 */
691 	INP_INFO_RLOCK(&ripcbinfo);
692 	gencnt = ripcbinfo.ipi_gencnt;
693 	n = ripcbinfo.ipi_count;
694 	INP_INFO_RUNLOCK(&ripcbinfo);
695 
696 	xig.xig_len = sizeof xig;
697 	xig.xig_count = n;
698 	xig.xig_gen = gencnt;
699 	xig.xig_sogen = so_gencnt;
700 	error = SYSCTL_OUT(req, &xig, sizeof xig);
701 	if (error)
702 		return error;
703 
704 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
705 	if (inp_list == 0)
706 		return ENOMEM;
707 
708 	INP_INFO_RLOCK(&ripcbinfo);
709 	for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n;
710 	     inp = LIST_NEXT(inp, inp_list)) {
711 		INP_LOCK(inp);
712 		if (inp->inp_gencnt <= gencnt &&
713 		    cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0) {
714 			/* XXX held references? */
715 			inp_list[i++] = inp;
716 		}
717 		INP_UNLOCK(inp);
718 	}
719 	INP_INFO_RUNLOCK(&ripcbinfo);
720 	n = i;
721 
722 	error = 0;
723 	for (i = 0; i < n; i++) {
724 		inp = inp_list[i];
725 		if (inp->inp_gencnt <= gencnt) {
726 			struct xinpcb xi;
727 			xi.xi_len = sizeof xi;
728 			/* XXX should avoid extra copy */
729 			bcopy(inp, &xi.xi_inp, sizeof *inp);
730 			if (inp->inp_socket)
731 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
732 			error = SYSCTL_OUT(req, &xi, sizeof xi);
733 		}
734 	}
735 	if (!error) {
736 		/*
737 		 * Give the user an updated idea of our state.
738 		 * If the generation differs from what we told
739 		 * her before, she knows that something happened
740 		 * while we were processing this request, and it
741 		 * might be necessary to retry.
742 		 */
743 		INP_INFO_RLOCK(&ripcbinfo);
744 		xig.xig_gen = ripcbinfo.ipi_gencnt;
745 		xig.xig_sogen = so_gencnt;
746 		xig.xig_count = ripcbinfo.ipi_count;
747 		INP_INFO_RUNLOCK(&ripcbinfo);
748 		error = SYSCTL_OUT(req, &xig, sizeof xig);
749 	}
750 	free(inp_list, M_TEMP);
751 	return error;
752 }
753 
754 /*
755  * This is the wrapper function for in_setsockaddr.  We just pass down
756  * the pcbinfo for in_setpeeraddr to lock.
757  */
758 static int
759 rip_sockaddr(struct socket *so, struct sockaddr **nam)
760 {
761 	return (in_setsockaddr(so, nam, &ripcbinfo));
762 }
763 
764 /*
765  * This is the wrapper function for in_setpeeraddr.  We just pass down
766  * the pcbinfo for in_setpeeraddr to lock.
767  */
768 static int
769 rip_peeraddr(struct socket *so, struct sockaddr **nam)
770 {
771 	return (in_setpeeraddr(so, nam, &ripcbinfo));
772 }
773 
774 
775 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
776 	    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
777 
778 struct pr_usrreqs rip_usrreqs = {
779 	rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
780 	pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
781 	pru_listen_notsupp, rip_peeraddr, pru_rcvd_notsupp,
782 	pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown,
783 	rip_sockaddr, sosend, soreceive, sopoll
784 };
785