1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 30ae76120SRobert Watson * The Regents of the University of California. 40ae76120SRobert Watson * All rights reserved. 5df8bae1dSRodney W. Grimes * 6df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 7df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 8df8bae1dSRodney W. Grimes * are met: 9df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 10df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 11df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 12df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 13df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 14df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 15df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 16df8bae1dSRodney W. Grimes * without specific prior written permission. 17df8bae1dSRodney W. Grimes * 18df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28df8bae1dSRodney W. Grimes * SUCH DAMAGE. 29df8bae1dSRodney W. Grimes * 3025f26ad8SGarrett Wollman * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 31df8bae1dSRodney W. Grimes */ 32df8bae1dSRodney W. Grimes 334b421e2dSMike Silbersack #include <sys/cdefs.h> 344b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 354b421e2dSMike Silbersack 366a800098SYoshinobu Inoue #include "opt_inet6.h" 376a800098SYoshinobu Inoue #include "opt_ipsec.h" 3833553d6eSBjoern A. Zeeb #include "opt_route.h" 394ea889c6SRobert Watson #include "opt_mac.h" 406a800098SYoshinobu Inoue 41df8bae1dSRodney W. Grimes #include <sys/param.h> 425a59cefcSBosko Milekic #include <sys/jail.h> 43117bcae7SGarrett Wollman #include <sys/kernel.h> 44960ed29cSSeigo Tanimura #include <sys/lock.h> 45df8bae1dSRodney W. Grimes #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47acd3428bSRobert Watson #include <sys/priv.h> 484787fd37SPaul Saab #include <sys/proc.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 50385195c0SMarko Zec #include <sys/rwlock.h> 51960ed29cSSeigo Tanimura #include <sys/signalvar.h> 52117bcae7SGarrett Wollman #include <sys/socket.h> 53df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 54960ed29cSSeigo Tanimura #include <sys/sx.h> 55117bcae7SGarrett Wollman #include <sys/sysctl.h> 56960ed29cSSeigo Tanimura #include <sys/systm.h> 57603724d3SBjoern A. Zeeb #include <sys/vimage.h> 588781d8e9SBruce Evans 5969c2d429SJeff Roberson #include <vm/uma.h> 60df8bae1dSRodney W. Grimes 61df8bae1dSRodney W. Grimes #include <net/if.h> 62df8bae1dSRodney W. Grimes #include <net/route.h> 634b79449eSBjoern A. Zeeb #include <net/vnet.h> 64df8bae1dSRodney W. Grimes 65df8bae1dSRodney W. Grimes #include <netinet/in.h> 66df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 67c1f8a6ceSDavid Greenman #include <netinet/in_pcb.h> 68c1f8a6ceSDavid Greenman #include <netinet/in_var.h> 69960ed29cSSeigo Tanimura #include <netinet/ip.h> 70df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 71df8bae1dSRodney W. Grimes #include <netinet/ip_mroute.h> 72df8bae1dSRodney W. Grimes 73100ba1a6SJordan K. Hubbard #include <netinet/ip_fw.h> 74db69a05dSPaul Saab #include <netinet/ip_dummynet.h> 754b79449eSBjoern A. Zeeb #include <netinet/vinet.h> 76100ba1a6SJordan K. Hubbard 77b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 78b9234fafSSam Leffler #include <netipsec/ipsec.h> 79b2630c29SGeorge V. Neville-Neil #endif /*IPSEC*/ 80b9234fafSSam Leffler 81aed55708SRobert Watson #include <security/mac/mac_framework.h> 82aed55708SRobert Watson 8344e33a07SMarko Zec #ifdef VIMAGE_GLOBALS 8482cd038dSYoshinobu Inoue struct inpcbhead ripcb; 8582cd038dSYoshinobu Inoue struct inpcbinfo ripcbinfo; 8644e33a07SMarko Zec #endif 87df8bae1dSRodney W. Grimes 88db69a05dSPaul Saab /* control hooks for ipfw and dummynet */ 899b932e9eSAndre Oppermann ip_fw_ctl_t *ip_fw_ctl_ptr = NULL; 909b932e9eSAndre Oppermann ip_dn_ctl_t *ip_dn_ctl_ptr = NULL; 91db69a05dSPaul Saab 92df8bae1dSRodney W. Grimes /* 930ae76120SRobert Watson * Hooks for multicast routing. They all default to NULL, so leave them not 940ae76120SRobert Watson * initialized and rely on BSS being set to 0. 95bbb4330bSLuigi Rizzo */ 96bbb4330bSLuigi Rizzo 970ae76120SRobert Watson /* 980ae76120SRobert Watson * The socket used to communicate with the multicast routing daemon. 990ae76120SRobert Watson */ 10044e33a07SMarko Zec #ifdef VIMAGE_GLOBALS 101bbb4330bSLuigi Rizzo struct socket *ip_mrouter; 10244e33a07SMarko Zec #endif 103bbb4330bSLuigi Rizzo 1040ae76120SRobert Watson /* 1050ae76120SRobert Watson * The various mrouter and rsvp functions. 1060ae76120SRobert Watson */ 107bbb4330bSLuigi Rizzo int (*ip_mrouter_set)(struct socket *, struct sockopt *); 108bbb4330bSLuigi Rizzo int (*ip_mrouter_get)(struct socket *, struct sockopt *); 109bbb4330bSLuigi Rizzo int (*ip_mrouter_done)(void); 110bbb4330bSLuigi Rizzo int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 111bbb4330bSLuigi Rizzo struct ip_moptions *); 1128b07e49aSJulian Elischer int (*mrt_ioctl)(int, caddr_t, int); 113bbb4330bSLuigi Rizzo int (*legal_vif_num)(int); 114bbb4330bSLuigi Rizzo u_long (*ip_mcast_src)(int); 115bbb4330bSLuigi Rizzo 116bbb4330bSLuigi Rizzo void (*rsvp_input_p)(struct mbuf *m, int off); 117bbb4330bSLuigi Rizzo int (*ip_rsvp_vif)(struct socket *, struct sockopt *); 118bbb4330bSLuigi Rizzo void (*ip_rsvp_force_done)(struct socket *); 119bbb4330bSLuigi Rizzo 120bbb4330bSLuigi Rizzo /* 1219ed324c9SAlexander Motin * Hash functions 1229ed324c9SAlexander Motin */ 1239ed324c9SAlexander Motin 1249ed324c9SAlexander Motin #define INP_PCBHASH_RAW_SIZE 256 1259ed324c9SAlexander Motin #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \ 1269ed324c9SAlexander Motin (((proto) + (laddr) + (faddr)) % (mask) + 1) 1279ed324c9SAlexander Motin 1289ed324c9SAlexander Motin static void 1299ed324c9SAlexander Motin rip_inshash(struct inpcb *inp) 1309ed324c9SAlexander Motin { 1319ed324c9SAlexander Motin struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1329ed324c9SAlexander Motin struct inpcbhead *pcbhash; 1339ed324c9SAlexander Motin int hash; 1349ed324c9SAlexander Motin 1359ed324c9SAlexander Motin INP_INFO_WLOCK_ASSERT(pcbinfo); 1369ed324c9SAlexander Motin INP_WLOCK_ASSERT(inp); 1379ed324c9SAlexander Motin 13818f401c6SAlexander Motin if (inp->inp_ip_p != 0 && 13918f401c6SAlexander Motin inp->inp_laddr.s_addr != INADDR_ANY && 14018f401c6SAlexander Motin inp->inp_faddr.s_addr != INADDR_ANY) { 1419ed324c9SAlexander Motin hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr, 1429ed324c9SAlexander Motin inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask); 14318f401c6SAlexander Motin } else 1449ed324c9SAlexander Motin hash = 0; 1459ed324c9SAlexander Motin pcbhash = &pcbinfo->ipi_hashbase[hash]; 1469ed324c9SAlexander Motin LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1479ed324c9SAlexander Motin } 1489ed324c9SAlexander Motin 1499ed324c9SAlexander Motin static void 1509ed324c9SAlexander Motin rip_delhash(struct inpcb *inp) 1519ed324c9SAlexander Motin { 15218f401c6SAlexander Motin 15318f401c6SAlexander Motin INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 1549ed324c9SAlexander Motin INP_WLOCK_ASSERT(inp); 15518f401c6SAlexander Motin 1569ed324c9SAlexander Motin LIST_REMOVE(inp, inp_hash); 1579ed324c9SAlexander Motin } 1589ed324c9SAlexander Motin 1599ed324c9SAlexander Motin /* 160df8bae1dSRodney W. Grimes * Raw interface to IP protocol. 161df8bae1dSRodney W. Grimes */ 162df8bae1dSRodney W. Grimes 163df8bae1dSRodney W. Grimes /* 164032dcc76SLuigi Rizzo * Initialize raw connection block q. 165df8bae1dSRodney W. Grimes */ 1664f590175SPaul Saab static void 1674f590175SPaul Saab rip_zone_change(void *tag) 1684f590175SPaul Saab { 1698b615593SMarko Zec INIT_VNET_INET(curvnet); 1704f590175SPaul Saab 171603724d3SBjoern A. Zeeb uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); 1724f590175SPaul Saab } 1734f590175SPaul Saab 174d915b280SStephan Uphoff static int 175d915b280SStephan Uphoff rip_inpcb_init(void *mem, int size, int flags) 176d915b280SStephan Uphoff { 17708651e1fSJohn Baldwin struct inpcb *inp = mem; 17808651e1fSJohn Baldwin 179d915b280SStephan Uphoff INP_LOCK_INIT(inp, "inp", "rawinp"); 180d915b280SStephan Uphoff return (0); 181d915b280SStephan Uphoff } 182d915b280SStephan Uphoff 183df8bae1dSRodney W. Grimes void 184f2565d68SRobert Watson rip_init(void) 185df8bae1dSRodney W. Grimes { 1868b615593SMarko Zec INIT_VNET_INET(curvnet); 187f2565d68SRobert Watson 188603724d3SBjoern A. Zeeb INP_INFO_LOCK_INIT(&V_ripcbinfo, "rip"); 189603724d3SBjoern A. Zeeb LIST_INIT(&V_ripcb); 190603724d3SBjoern A. Zeeb V_ripcbinfo.ipi_listhead = &V_ripcb; 191ac957cd2SJulian Elischer V_ripcbinfo.ipi_hashbase = 192ac957cd2SJulian Elischer hashinit(INP_PCBHASH_RAW_SIZE, M_PCB, &V_ripcbinfo.ipi_hashmask); 193ac957cd2SJulian Elischer V_ripcbinfo.ipi_porthashbase = 194ac957cd2SJulian Elischer hashinit(1, M_PCB, &V_ripcbinfo.ipi_porthashmask); 195603724d3SBjoern A. Zeeb V_ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb), 196d915b280SStephan Uphoff NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 197603724d3SBjoern A. Zeeb uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); 1980ae76120SRobert Watson EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, 1990ae76120SRobert Watson EVENTHANDLER_PRI_ANY); 200df8bae1dSRodney W. Grimes } 201df8bae1dSRodney W. Grimes 2023b6dd5a9SSam Leffler static int 2033b19fa35SRobert Watson rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, 2043b19fa35SRobert Watson struct sockaddr_in *ripsrc) 2053b6dd5a9SSam Leffler { 2064ea889c6SRobert Watson int policyfail = 0; 20733841545SHajimu UMEMOTO 2089ad11dd8SRobert Watson INP_RLOCK_ASSERT(last); 209cbe42d48SRobert Watson 210b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 211da0f4099SHajimu UMEMOTO /* check AH/ESP integrity. */ 212da0f4099SHajimu UMEMOTO if (ipsec4_in_reject(n, last)) { 213da0f4099SHajimu UMEMOTO policyfail = 1; 214b9234fafSSam Leffler } 215b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 2164ea889c6SRobert Watson #ifdef MAC 21730d239bcSRobert Watson if (!policyfail && mac_inpcb_check_deliver(last, n) != 0) 2184ea889c6SRobert Watson policyfail = 1; 2194ea889c6SRobert Watson #endif 220936cd18dSAndre Oppermann /* Check the minimum TTL for socket. */ 221936cd18dSAndre Oppermann if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl) 222936cd18dSAndre Oppermann policyfail = 1; 2233b6dd5a9SSam Leffler if (!policyfail) { 2243b6dd5a9SSam Leffler struct mbuf *opts = NULL; 2251e4d7da7SRobert Watson struct socket *so; 2263b6dd5a9SSam Leffler 2271e4d7da7SRobert Watson so = last->inp_socket; 2283b6dd5a9SSam Leffler if ((last->inp_flags & INP_CONTROLOPTS) || 2291fd7af26SAndre Oppermann (so->so_options & (SO_TIMESTAMP | SO_BINTIME))) 23082c23ebaSBill Fenner ip_savecontrol(last, &opts, ip, n); 2311e4d7da7SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 2321e4d7da7SRobert Watson if (sbappendaddr_locked(&so->so_rcv, 2333b19fa35SRobert Watson (struct sockaddr *)ripsrc, n, opts) == 0) { 234df8bae1dSRodney W. Grimes /* should notify about lost packet */ 235df8bae1dSRodney W. Grimes m_freem(n); 23682c23ebaSBill Fenner if (opts) 23782c23ebaSBill Fenner m_freem(opts); 2381e4d7da7SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 2394cc20ab1SSeigo Tanimura } else 2401e4d7da7SRobert Watson sorwakeup_locked(so); 2413b6dd5a9SSam Leffler } else 2423b6dd5a9SSam Leffler m_freem(n); 2430ae76120SRobert Watson return (policyfail); 244df8bae1dSRodney W. Grimes } 2453b6dd5a9SSam Leffler 2463b6dd5a9SSam Leffler /* 2470ae76120SRobert Watson * Setup generic address and protocol structures for raw_input routine, then 2480ae76120SRobert Watson * pass them along with mbuf chain. 2493b6dd5a9SSam Leffler */ 2503b6dd5a9SSam Leffler void 2513b6dd5a9SSam Leffler rip_input(struct mbuf *m, int off) 2523b6dd5a9SSam Leffler { 2538b615593SMarko Zec INIT_VNET_INET(curvnet); 254d10910e6SBruce M Simpson struct ifnet *ifp; 2553b6dd5a9SSam Leffler struct ip *ip = mtod(m, struct ip *); 2563b6dd5a9SSam Leffler int proto = ip->ip_p; 2573b6dd5a9SSam Leffler struct inpcb *inp, *last; 2583b19fa35SRobert Watson struct sockaddr_in ripsrc; 2599ed324c9SAlexander Motin int hash; 2603b6dd5a9SSam Leffler 2613b19fa35SRobert Watson bzero(&ripsrc, sizeof(ripsrc)); 2623b19fa35SRobert Watson ripsrc.sin_len = sizeof(ripsrc); 2633b19fa35SRobert Watson ripsrc.sin_family = AF_INET; 2643b6dd5a9SSam Leffler ripsrc.sin_addr = ip->ip_src; 2653b6dd5a9SSam Leffler last = NULL; 266d10910e6SBruce M Simpson 267d10910e6SBruce M Simpson ifp = m->m_pkthdr.rcvif; 268d10910e6SBruce M Simpson 2699ed324c9SAlexander Motin hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, 270603724d3SBjoern A. Zeeb ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); 271603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_ripcbinfo); 272603724d3SBjoern A. Zeeb LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { 2730ca3b096SAlexander Motin if (inp->inp_ip_p != proto) 2740ca3b096SAlexander Motin continue; 2750ca3b096SAlexander Motin #ifdef INET6 27686d02c5cSBjoern A. Zeeb /* XXX inp locking */ 2770ca3b096SAlexander Motin if ((inp->inp_vflag & INP_IPV4) == 0) 2780ca3b096SAlexander Motin continue; 2790ca3b096SAlexander Motin #endif 2800ca3b096SAlexander Motin if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 2810ca3b096SAlexander Motin continue; 2820ca3b096SAlexander Motin if (inp->inp_faddr.s_addr != ip->ip_src.s_addr) 2830ca3b096SAlexander Motin continue; 284d10910e6SBruce M Simpson if (jailed(inp->inp_cred)) { 285d10910e6SBruce M Simpson /* 286d10910e6SBruce M Simpson * XXX: If faddr was bound to multicast group, 287d10910e6SBruce M Simpson * jailed raw socket will drop datagram. 288d10910e6SBruce M Simpson */ 289b89e82ddSJamie Gritton if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) 2909ed324c9SAlexander Motin continue; 291d10910e6SBruce M Simpson } 2923bb87a6cSKip Macy if (last != NULL) { 2939ed324c9SAlexander Motin struct mbuf *n; 2949ed324c9SAlexander Motin 2959ed324c9SAlexander Motin n = m_copy(m, 0, (int)M_COPYALL); 2969ed324c9SAlexander Motin if (n != NULL) 2979ed324c9SAlexander Motin (void) rip_append(last, ip, n, &ripsrc); 2989ed324c9SAlexander Motin /* XXX count dropped packet */ 2999ed324c9SAlexander Motin INP_RUNLOCK(last); 3009ed324c9SAlexander Motin } 30186d02c5cSBjoern A. Zeeb INP_RLOCK(inp); 3029ed324c9SAlexander Motin last = inp; 3039ed324c9SAlexander Motin } 304603724d3SBjoern A. Zeeb LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { 3050ca3b096SAlexander Motin if (inp->inp_ip_p && inp->inp_ip_p != proto) 3063b6dd5a9SSam Leffler continue; 3073b6dd5a9SSam Leffler #ifdef INET6 30886d02c5cSBjoern A. Zeeb /* XXX inp locking */ 3093b6dd5a9SSam Leffler if ((inp->inp_vflag & INP_IPV4) == 0) 3100ca3b096SAlexander Motin continue; 3113b6dd5a9SSam Leffler #endif 312d10910e6SBruce M Simpson if (!in_nullhost(inp->inp_laddr) && 313d10910e6SBruce M Simpson !in_hosteq(inp->inp_laddr, ip->ip_dst)) 3140ca3b096SAlexander Motin continue; 315d10910e6SBruce M Simpson if (!in_nullhost(inp->inp_faddr) && 316d10910e6SBruce M Simpson !in_hosteq(inp->inp_faddr, ip->ip_src)) 3170ca3b096SAlexander Motin continue; 318d10910e6SBruce M Simpson if (jailed(inp->inp_cred)) { 319d10910e6SBruce M Simpson /* 320d10910e6SBruce M Simpson * Allow raw socket in jail to receive multicast; 321d10910e6SBruce M Simpson * assume process had PRIV_NETINET_RAW at attach, 322d10910e6SBruce M Simpson * and fall through into normal filter path if so. 323d10910e6SBruce M Simpson */ 324d10910e6SBruce M Simpson if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && 325d10910e6SBruce M Simpson prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) 3260ca3b096SAlexander Motin continue; 327d10910e6SBruce M Simpson } 328d10910e6SBruce M Simpson /* 329d10910e6SBruce M Simpson * If this raw socket has multicast state, and we 330d10910e6SBruce M Simpson * have received a multicast, check if this socket 331d10910e6SBruce M Simpson * should receive it, as multicast filtering is now 332d10910e6SBruce M Simpson * the responsibility of the transport layer. 333d10910e6SBruce M Simpson */ 334d10910e6SBruce M Simpson if (inp->inp_moptions != NULL && 335d10910e6SBruce M Simpson IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 336d10910e6SBruce M Simpson struct sockaddr_in group; 337d10910e6SBruce M Simpson int blocked; 338d10910e6SBruce M Simpson 339d10910e6SBruce M Simpson bzero(&group, sizeof(struct sockaddr_in)); 340d10910e6SBruce M Simpson group.sin_len = sizeof(struct sockaddr_in); 341d10910e6SBruce M Simpson group.sin_family = AF_INET; 342d10910e6SBruce M Simpson group.sin_addr = ip->ip_dst; 343d10910e6SBruce M Simpson 344d10910e6SBruce M Simpson blocked = imo_multi_filter(inp->inp_moptions, ifp, 345d10910e6SBruce M Simpson (struct sockaddr *)&group, 346d10910e6SBruce M Simpson (struct sockaddr *)&ripsrc); 347d10910e6SBruce M Simpson if (blocked != MCAST_PASS) { 348d10910e6SBruce M Simpson V_ipstat.ips_notmember++; 349d10910e6SBruce M Simpson continue; 350d10910e6SBruce M Simpson } 351d10910e6SBruce M Simpson } 3523bb87a6cSKip Macy if (last != NULL) { 3533b6dd5a9SSam Leffler struct mbuf *n; 3543b6dd5a9SSam Leffler 3553b6dd5a9SSam Leffler n = m_copy(m, 0, (int)M_COPYALL); 3563b6dd5a9SSam Leffler if (n != NULL) 3573b19fa35SRobert Watson (void) rip_append(last, ip, n, &ripsrc); 3583b6dd5a9SSam Leffler /* XXX count dropped packet */ 3599ad11dd8SRobert Watson INP_RUNLOCK(last); 360df8bae1dSRodney W. Grimes } 36186d02c5cSBjoern A. Zeeb INP_RLOCK(inp); 36282c23ebaSBill Fenner last = inp; 363df8bae1dSRodney W. Grimes } 364603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_ripcbinfo); 3653b6dd5a9SSam Leffler if (last != NULL) { 3663b19fa35SRobert Watson if (rip_append(last, ip, m, &ripsrc) != 0) 367603724d3SBjoern A. Zeeb V_ipstat.ips_delivered--; 3689ad11dd8SRobert Watson INP_RUNLOCK(last); 369df8bae1dSRodney W. Grimes } else { 370df8bae1dSRodney W. Grimes m_freem(m); 371603724d3SBjoern A. Zeeb V_ipstat.ips_noproto++; 372603724d3SBjoern A. Zeeb V_ipstat.ips_delivered--; 373df8bae1dSRodney W. Grimes } 374df8bae1dSRodney W. Grimes } 375df8bae1dSRodney W. Grimes 376df8bae1dSRodney W. Grimes /* 3770ae76120SRobert Watson * Generate IP header and pass packet to ip_output. Tack on options user may 3780ae76120SRobert Watson * have setup with control call. 379df8bae1dSRodney W. Grimes */ 380df8bae1dSRodney W. Grimes int 3813b6dd5a9SSam Leffler rip_output(struct mbuf *m, struct socket *so, u_long dst) 382df8bae1dSRodney W. Grimes { 3838b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 3843b6dd5a9SSam Leffler struct ip *ip; 385ac830b58SBosko Milekic int error; 3863b6dd5a9SSam Leffler struct inpcb *inp = sotoinpcb(so); 387b5d47ff5SJohn-Mark Gurney int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | 388b5d47ff5SJohn-Mark Gurney IP_ALLOWBROADCAST; 389df8bae1dSRodney W. Grimes 390df8bae1dSRodney W. Grimes /* 3910ae76120SRobert Watson * If the user handed us a complete IP packet, use it. Otherwise, 3920ae76120SRobert Watson * allocate an mbuf for a header and fill it in. 393df8bae1dSRodney W. Grimes */ 394df8bae1dSRodney W. Grimes if ((inp->inp_flags & INP_HDRINCL) == 0) { 395430d30d8SBill Fenner if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { 396430d30d8SBill Fenner m_freem(m); 397430d30d8SBill Fenner return(EMSGSIZE); 398430d30d8SBill Fenner } 3992d01d331SRobert Watson M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 4006b48911bSRobert Watson if (m == NULL) 4016b48911bSRobert Watson return(ENOBUFS); 402ac830b58SBosko Milekic 4039ad11dd8SRobert Watson INP_RLOCK(inp); 404df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 4058ce3f3ddSRuslan Ermilov ip->ip_tos = inp->inp_ip_tos; 406b2828ad2SAndre Oppermann if (inp->inp_flags & INP_DONTFRAG) 407b2828ad2SAndre Oppermann ip->ip_off = IP_DF; 408b2828ad2SAndre Oppermann else 409df8bae1dSRodney W. Grimes ip->ip_off = 0; 410ca98b82cSDavid Greenman ip->ip_p = inp->inp_ip_p; 411df8bae1dSRodney W. Grimes ip->ip_len = m->m_pkthdr.len; 412b89e82ddSJamie Gritton ip->ip_src = inp->inp_laddr; 413b89e82ddSJamie Gritton error = prison_get_ip4(inp->inp_cred, &ip->ip_src); 414b89e82ddSJamie Gritton if (error != 0) { 415413628a7SBjoern A. Zeeb INP_RUNLOCK(inp); 416413628a7SBjoern A. Zeeb m_freem(m); 417b89e82ddSJamie Gritton return (error); 418413628a7SBjoern A. Zeeb } 419df8bae1dSRodney W. Grimes ip->ip_dst.s_addr = dst; 4208ce3f3ddSRuslan Ermilov ip->ip_ttl = inp->inp_ip_ttl; 421df8bae1dSRodney W. Grimes } else { 422430d30d8SBill Fenner if (m->m_pkthdr.len > IP_MAXPACKET) { 423430d30d8SBill Fenner m_freem(m); 424430d30d8SBill Fenner return(EMSGSIZE); 425430d30d8SBill Fenner } 4269ad11dd8SRobert Watson INP_RLOCK(inp); 427df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 428b89e82ddSJamie Gritton error = prison_check_ip4(inp->inp_cred, &ip->ip_src); 429b89e82ddSJamie Gritton if (error != 0) { 4309ad11dd8SRobert Watson INP_RUNLOCK(inp); 4315a59cefcSBosko Milekic m_freem(m); 432b89e82ddSJamie Gritton return (error); 4335a59cefcSBosko Milekic } 4340ae76120SRobert Watson 4350ae76120SRobert Watson /* 4360ae76120SRobert Watson * Don't allow both user specified and setsockopt options, 4370ae76120SRobert Watson * and don't allow packet length sizes that will crash. 4380ae76120SRobert Watson */ 4390ae76120SRobert Watson if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) 44091108995SBill Fenner || (ip->ip_len > m->m_pkthdr.len) 44153be11f6SPoul-Henning Kamp || (ip->ip_len < (ip->ip_hl << 2))) { 4429ad11dd8SRobert Watson INP_RUNLOCK(inp); 443072b9b24SPaul Traina m_freem(m); 4440ae76120SRobert Watson return (EINVAL); 445072b9b24SPaul Traina } 446df8bae1dSRodney W. Grimes if (ip->ip_id == 0) 4471f44b0a1SDavid Malone ip->ip_id = ip_newid(); 4480ae76120SRobert Watson 4490ae76120SRobert Watson /* 4500ae76120SRobert Watson * XXX prevent ip_output from overwriting header fields. 4510ae76120SRobert Watson */ 452df8bae1dSRodney W. Grimes flags |= IP_RAWOUTPUT; 453603724d3SBjoern A. Zeeb V_ipstat.ips_rawout++; 454df8bae1dSRodney W. Grimes } 4556a800098SYoshinobu Inoue 4566fbfd582SAndre Oppermann if (inp->inp_flags & INP_ONESBCAST) 4578afa2304SBruce M Simpson flags |= IP_SENDONES; 4588afa2304SBruce M Simpson 459ac830b58SBosko Milekic #ifdef MAC 46030d239bcSRobert Watson mac_inpcb_create_mbuf(inp, m); 461ac830b58SBosko Milekic #endif 462ac830b58SBosko Milekic 463ac830b58SBosko Milekic error = ip_output(m, inp->inp_options, NULL, flags, 464ac830b58SBosko Milekic inp->inp_moptions, inp); 4659ad11dd8SRobert Watson INP_RUNLOCK(inp); 4660ae76120SRobert Watson return (error); 467df8bae1dSRodney W. Grimes } 468df8bae1dSRodney W. Grimes 469df8bae1dSRodney W. Grimes /* 470df8bae1dSRodney W. Grimes * Raw IP socket option processing. 47183503a92SRobert Watson * 4726c67b8b6SRobert Watson * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could 4736c67b8b6SRobert Watson * only be created by a privileged process, and as such, socket option 4746c67b8b6SRobert Watson * operations to manage system properties on any raw socket were allowed to 4756c67b8b6SRobert Watson * take place without explicit additional access control checks. However, 4766c67b8b6SRobert Watson * raw sockets can now also be created in jail(), and therefore explicit 4776c67b8b6SRobert Watson * checks are now required. Likewise, raw sockets can be used by a process 4786c67b8b6SRobert Watson * after it gives up privilege, so some caution is required. For options 4796c67b8b6SRobert Watson * passed down to the IP layer via ip_ctloutput(), checks are assumed to be 4806c67b8b6SRobert Watson * performed in ip_ctloutput() and therefore no check occurs here. 48102dd4b5cSRobert Watson * Unilaterally checking priv_check() here breaks normal IP socket option 4826c67b8b6SRobert Watson * operations on raw sockets. 4836c67b8b6SRobert Watson * 4846c67b8b6SRobert Watson * When adding new socket options here, make sure to add access control 4856c67b8b6SRobert Watson * checks here as necessary. 486df8bae1dSRodney W. Grimes */ 487df8bae1dSRodney W. Grimes int 4883b6dd5a9SSam Leffler rip_ctloutput(struct socket *so, struct sockopt *sopt) 489df8bae1dSRodney W. Grimes { 490cfe8b629SGarrett Wollman struct inpcb *inp = sotoinpcb(so); 491cfe8b629SGarrett Wollman int error, optval; 492df8bae1dSRodney W. Grimes 493bc97ba51SJulian Elischer if (sopt->sopt_level != IPPROTO_IP) { 494bc97ba51SJulian Elischer if ((sopt->sopt_level == SOL_SOCKET) && 495bc97ba51SJulian Elischer (sopt->sopt_name == SO_SETFIB)) { 496bc97ba51SJulian Elischer inp->inp_inc.inc_fibnum = so->so_fibnum; 497bc97ba51SJulian Elischer return (0); 498bc97ba51SJulian Elischer } 499df8bae1dSRodney W. Grimes return (EINVAL); 500bc97ba51SJulian Elischer } 501df8bae1dSRodney W. Grimes 50225f26ad8SGarrett Wollman error = 0; 503cfe8b629SGarrett Wollman switch (sopt->sopt_dir) { 504cfe8b629SGarrett Wollman case SOPT_GET: 505cfe8b629SGarrett Wollman switch (sopt->sopt_name) { 506cfe8b629SGarrett Wollman case IP_HDRINCL: 507cfe8b629SGarrett Wollman optval = inp->inp_flags & INP_HDRINCL; 508cfe8b629SGarrett Wollman error = sooptcopyout(sopt, &optval, sizeof optval); 509cfe8b629SGarrett Wollman break; 510df8bae1dSRodney W. Grimes 5117b109fa4SLuigi Rizzo case IP_FW_ADD: /* ADD actually returns the body... */ 51209bb5f75SPoul-Henning Kamp case IP_FW_GET: 513cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_GETSIZE: 514cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_LIST: 515ff2f6fe8SPaolo Pisati case IP_FW_NAT_GET_CONFIG: 516ff2f6fe8SPaolo Pisati case IP_FW_NAT_GET_LOG: 5179b932e9eSAndre Oppermann if (ip_fw_ctl_ptr != NULL) 518cfe8b629SGarrett Wollman error = ip_fw_ctl_ptr(sopt); 5197b109fa4SLuigi Rizzo else 5207b109fa4SLuigi Rizzo error = ENOPROTOOPT; 521cfe8b629SGarrett Wollman break; 5224dd1662bSUgen J.S. Antsilevich 523b715f178SLuigi Rizzo case IP_DUMMYNET_GET: 5249b932e9eSAndre Oppermann if (ip_dn_ctl_ptr != NULL) 525b715f178SLuigi Rizzo error = ip_dn_ctl_ptr(sopt); 5267b109fa4SLuigi Rizzo else 5277b109fa4SLuigi Rizzo error = ENOPROTOOPT; 528b715f178SLuigi Rizzo break ; 5291c5de19aSGarrett Wollman 5301c5de19aSGarrett Wollman case MRT_INIT: 5311c5de19aSGarrett Wollman case MRT_DONE: 5321c5de19aSGarrett Wollman case MRT_ADD_VIF: 5331c5de19aSGarrett Wollman case MRT_DEL_VIF: 5341c5de19aSGarrett Wollman case MRT_ADD_MFC: 5351c5de19aSGarrett Wollman case MRT_DEL_MFC: 5361c5de19aSGarrett Wollman case MRT_VERSION: 5371c5de19aSGarrett Wollman case MRT_ASSERT: 5381e78ac21SJeffrey Hsu case MRT_API_SUPPORT: 5391e78ac21SJeffrey Hsu case MRT_API_CONFIG: 5401e78ac21SJeffrey Hsu case MRT_ADD_BW_UPCALL: 5411e78ac21SJeffrey Hsu case MRT_DEL_BW_UPCALL: 542acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 5436c67b8b6SRobert Watson if (error != 0) 5446c67b8b6SRobert Watson return (error); 545bbb4330bSLuigi Rizzo error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : 546bbb4330bSLuigi Rizzo EOPNOTSUPP; 547cfe8b629SGarrett Wollman break; 548cfe8b629SGarrett Wollman 549cfe8b629SGarrett Wollman default: 550cfe8b629SGarrett Wollman error = ip_ctloutput(so, sopt); 551cfe8b629SGarrett Wollman break; 552df8bae1dSRodney W. Grimes } 553cfe8b629SGarrett Wollman break; 554cfe8b629SGarrett Wollman 555cfe8b629SGarrett Wollman case SOPT_SET: 556cfe8b629SGarrett Wollman switch (sopt->sopt_name) { 557cfe8b629SGarrett Wollman case IP_HDRINCL: 558cfe8b629SGarrett Wollman error = sooptcopyin(sopt, &optval, sizeof optval, 559cfe8b629SGarrett Wollman sizeof optval); 560cfe8b629SGarrett Wollman if (error) 561cfe8b629SGarrett Wollman break; 562cfe8b629SGarrett Wollman if (optval) 563cfe8b629SGarrett Wollman inp->inp_flags |= INP_HDRINCL; 564cfe8b629SGarrett Wollman else 565cfe8b629SGarrett Wollman inp->inp_flags &= ~INP_HDRINCL; 566cfe8b629SGarrett Wollman break; 567cfe8b629SGarrett Wollman 5688ba03966SRuslan Ermilov case IP_FW_ADD: 569cfe8b629SGarrett Wollman case IP_FW_DEL: 570cfe8b629SGarrett Wollman case IP_FW_FLUSH: 571cfe8b629SGarrett Wollman case IP_FW_ZERO: 5720b6c1a83SBrian Feldman case IP_FW_RESETLOG: 573cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_ADD: 574cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_DEL: 575cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_FLUSH: 576ff2f6fe8SPaolo Pisati case IP_FW_NAT_CFG: 577ff2f6fe8SPaolo Pisati case IP_FW_NAT_DEL: 5789b932e9eSAndre Oppermann if (ip_fw_ctl_ptr != NULL) 579cfe8b629SGarrett Wollman error = ip_fw_ctl_ptr(sopt); 5807b109fa4SLuigi Rizzo else 5817b109fa4SLuigi Rizzo error = ENOPROTOOPT; 582cfe8b629SGarrett Wollman break; 583cfe8b629SGarrett Wollman 584b715f178SLuigi Rizzo case IP_DUMMYNET_CONFIGURE: 585b715f178SLuigi Rizzo case IP_DUMMYNET_DEL: 586b715f178SLuigi Rizzo case IP_DUMMYNET_FLUSH: 5879b932e9eSAndre Oppermann if (ip_dn_ctl_ptr != NULL) 588b715f178SLuigi Rizzo error = ip_dn_ctl_ptr(sopt); 5897b109fa4SLuigi Rizzo else 5907b109fa4SLuigi Rizzo error = ENOPROTOOPT ; 591b715f178SLuigi Rizzo break ; 592cfe8b629SGarrett Wollman 593cfe8b629SGarrett Wollman case IP_RSVP_ON: 594acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 5956c67b8b6SRobert Watson if (error != 0) 5966c67b8b6SRobert Watson return (error); 597cfe8b629SGarrett Wollman error = ip_rsvp_init(so); 598cfe8b629SGarrett Wollman break; 599cfe8b629SGarrett Wollman 600cfe8b629SGarrett Wollman case IP_RSVP_OFF: 601acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 6026c67b8b6SRobert Watson if (error != 0) 6036c67b8b6SRobert Watson return (error); 604cfe8b629SGarrett Wollman error = ip_rsvp_done(); 605cfe8b629SGarrett Wollman break; 606cfe8b629SGarrett Wollman 607cfe8b629SGarrett Wollman case IP_RSVP_VIF_ON: 608cfe8b629SGarrett Wollman case IP_RSVP_VIF_OFF: 609acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 6106c67b8b6SRobert Watson if (error != 0) 6116c67b8b6SRobert Watson return (error); 612bbb4330bSLuigi Rizzo error = ip_rsvp_vif ? 613bbb4330bSLuigi Rizzo ip_rsvp_vif(so, sopt) : EINVAL; 614cfe8b629SGarrett Wollman break; 615cfe8b629SGarrett Wollman 616cfe8b629SGarrett Wollman case MRT_INIT: 617cfe8b629SGarrett Wollman case MRT_DONE: 618cfe8b629SGarrett Wollman case MRT_ADD_VIF: 619cfe8b629SGarrett Wollman case MRT_DEL_VIF: 620cfe8b629SGarrett Wollman case MRT_ADD_MFC: 621cfe8b629SGarrett Wollman case MRT_DEL_MFC: 622cfe8b629SGarrett Wollman case MRT_VERSION: 623cfe8b629SGarrett Wollman case MRT_ASSERT: 6241e78ac21SJeffrey Hsu case MRT_API_SUPPORT: 6251e78ac21SJeffrey Hsu case MRT_API_CONFIG: 6261e78ac21SJeffrey Hsu case MRT_ADD_BW_UPCALL: 6271e78ac21SJeffrey Hsu case MRT_DEL_BW_UPCALL: 628acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 6296c67b8b6SRobert Watson if (error != 0) 6306c67b8b6SRobert Watson return (error); 631bbb4330bSLuigi Rizzo error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : 632bbb4330bSLuigi Rizzo EOPNOTSUPP; 633cfe8b629SGarrett Wollman break; 634cfe8b629SGarrett Wollman 635cfe8b629SGarrett Wollman default: 636cfe8b629SGarrett Wollman error = ip_ctloutput(so, sopt); 637cfe8b629SGarrett Wollman break; 638cfe8b629SGarrett Wollman } 639cfe8b629SGarrett Wollman break; 640cfe8b629SGarrett Wollman } 641cfe8b629SGarrett Wollman 642cfe8b629SGarrett Wollman return (error); 643df8bae1dSRodney W. Grimes } 644df8bae1dSRodney W. Grimes 64539191c8eSGarrett Wollman /* 6460ae76120SRobert Watson * This function exists solely to receive the PRC_IFDOWN messages which are 6470ae76120SRobert Watson * sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, and calls 6480ae76120SRobert Watson * in_ifadown() to remove all routes corresponding to that address. It also 6490ae76120SRobert Watson * receives the PRC_IFUP messages from if_up() and reinstalls the interface 6500ae76120SRobert Watson * routes. 65139191c8eSGarrett Wollman */ 65239191c8eSGarrett Wollman void 6533b6dd5a9SSam Leffler rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) 65439191c8eSGarrett Wollman { 6558b615593SMarko Zec INIT_VNET_INET(curvnet); 65639191c8eSGarrett Wollman struct in_ifaddr *ia; 65739191c8eSGarrett Wollman struct ifnet *ifp; 65839191c8eSGarrett Wollman int err; 65939191c8eSGarrett Wollman int flags; 66039191c8eSGarrett Wollman 66139191c8eSGarrett Wollman switch (cmd) { 66239191c8eSGarrett Wollman case PRC_IFDOWN: 663603724d3SBjoern A. Zeeb TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 66439191c8eSGarrett Wollman if (ia->ia_ifa.ifa_addr == sa 66539191c8eSGarrett Wollman && (ia->ia_flags & IFA_ROUTE)) { 66639191c8eSGarrett Wollman /* 66739191c8eSGarrett Wollman * in_ifscrub kills the interface route. 66839191c8eSGarrett Wollman */ 66939191c8eSGarrett Wollman in_ifscrub(ia->ia_ifp, ia); 67039191c8eSGarrett Wollman /* 6710ae76120SRobert Watson * in_ifadown gets rid of all the rest of the 6720ae76120SRobert Watson * routes. This is not quite the right thing 6730ae76120SRobert Watson * to do, but at least if we are running a 6740ae76120SRobert Watson * routing process they will come back. 67539191c8eSGarrett Wollman */ 67691854268SRuslan Ermilov in_ifadown(&ia->ia_ifa, 0); 67739191c8eSGarrett Wollman break; 67839191c8eSGarrett Wollman } 67939191c8eSGarrett Wollman } 68039191c8eSGarrett Wollman break; 68139191c8eSGarrett Wollman 68239191c8eSGarrett Wollman case PRC_IFUP: 683603724d3SBjoern A. Zeeb TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 68439191c8eSGarrett Wollman if (ia->ia_ifa.ifa_addr == sa) 68539191c8eSGarrett Wollman break; 68639191c8eSGarrett Wollman } 68739191c8eSGarrett Wollman if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) 68839191c8eSGarrett Wollman return; 68939191c8eSGarrett Wollman flags = RTF_UP; 69039191c8eSGarrett Wollman ifp = ia->ia_ifa.ifa_ifp; 69139191c8eSGarrett Wollman 69239191c8eSGarrett Wollman if ((ifp->if_flags & IFF_LOOPBACK) 69339191c8eSGarrett Wollman || (ifp->if_flags & IFF_POINTOPOINT)) 69439191c8eSGarrett Wollman flags |= RTF_HOST; 69539191c8eSGarrett Wollman 69639191c8eSGarrett Wollman err = rtinit(&ia->ia_ifa, RTM_ADD, flags); 69739191c8eSGarrett Wollman if (err == 0) 69839191c8eSGarrett Wollman ia->ia_flags |= IFA_ROUTE; 69939191c8eSGarrett Wollman break; 70039191c8eSGarrett Wollman } 70139191c8eSGarrett Wollman } 70239191c8eSGarrett Wollman 703c7547d1aSBruce M Simpson u_long rip_sendspace = 9216; 704c7547d1aSBruce M Simpson u_long rip_recvspace = 9216; 705df8bae1dSRodney W. Grimes 706e59898ffSMaxime Henrion SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, 7073d177f46SBill Fumerola &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); 708e59898ffSMaxime Henrion SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, 7090ca2861fSRuslan Ermilov &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); 710117bcae7SGarrett Wollman 711117bcae7SGarrett Wollman static int 712b40ce416SJulian Elischer rip_attach(struct socket *so, int proto, struct thread *td) 713df8bae1dSRodney W. Grimes { 7148b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 715117bcae7SGarrett Wollman struct inpcb *inp; 7163b6dd5a9SSam Leffler int error; 717c1f8a6ceSDavid Greenman 718117bcae7SGarrett Wollman inp = sotoinpcb(so); 71914ba8addSRobert Watson KASSERT(inp == NULL, ("rip_attach: inp != NULL")); 72032f9753cSRobert Watson 72132f9753cSRobert Watson error = priv_check(td, PRIV_NETINET_RAW); 722acd3428bSRobert Watson if (error) 7230ae76120SRobert Watson return (error); 72414ba8addSRobert Watson if (proto >= IPPROTO_MAX || proto < 0) 7254d3ffc98SBill Fenner return EPROTONOSUPPORT; 7266a800098SYoshinobu Inoue error = soreserve(so, rip_sendspace, rip_recvspace); 72714ba8addSRobert Watson if (error) 7280ae76120SRobert Watson return (error); 729603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 730603724d3SBjoern A. Zeeb error = in_pcballoc(so, &V_ripcbinfo); 7313b6dd5a9SSam Leffler if (error) { 732603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 7330ae76120SRobert Watson return (error); 7343b6dd5a9SSam Leffler } 735df8bae1dSRodney W. Grimes inp = (struct inpcb *)so->so_pcb; 7366a800098SYoshinobu Inoue inp->inp_vflag |= INP_IPV4; 737ca98b82cSDavid Greenman inp->inp_ip_p = proto; 738603724d3SBjoern A. Zeeb inp->inp_ip_ttl = V_ip_defttl; 7399ed324c9SAlexander Motin rip_inshash(inp); 740603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 7418501a69cSRobert Watson INP_WUNLOCK(inp); 7420ae76120SRobert Watson return (0); 743df8bae1dSRodney W. Grimes } 744117bcae7SGarrett Wollman 74550d7c061SSam Leffler static void 746a152f8a3SRobert Watson rip_detach(struct socket *so) 74750d7c061SSam Leffler { 7488b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 749a152f8a3SRobert Watson struct inpcb *inp; 7503ca1570cSRobert Watson 751a152f8a3SRobert Watson inp = sotoinpcb(so); 752a152f8a3SRobert Watson KASSERT(inp != NULL, ("rip_detach: inp == NULL")); 753a152f8a3SRobert Watson KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 754a152f8a3SRobert Watson ("rip_detach: not closed")); 75550d7c061SSam Leffler 756603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 7578501a69cSRobert Watson INP_WLOCK(inp); 7589ed324c9SAlexander Motin rip_delhash(inp); 759603724d3SBjoern A. Zeeb if (so == V_ip_mrouter && ip_mrouter_done) 76050d7c061SSam Leffler ip_mrouter_done(); 76150d7c061SSam Leffler if (ip_rsvp_force_done) 76250d7c061SSam Leffler ip_rsvp_force_done(so); 763603724d3SBjoern A. Zeeb if (so == V_ip_rsvpd) 76450d7c061SSam Leffler ip_rsvp_done(); 76550d7c061SSam Leffler in_pcbdetach(inp); 76614ba8addSRobert Watson in_pcbfree(inp); 767603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 76850d7c061SSam Leffler } 76950d7c061SSam Leffler 770bc725eafSRobert Watson static void 771a152f8a3SRobert Watson rip_dodisconnect(struct socket *so, struct inpcb *inp) 772117bcae7SGarrett Wollman { 77318f401c6SAlexander Motin 77418f401c6SAlexander Motin INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 7758501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 776a152f8a3SRobert Watson 7779ed324c9SAlexander Motin rip_delhash(inp); 778a152f8a3SRobert Watson inp->inp_faddr.s_addr = INADDR_ANY; 7799ed324c9SAlexander Motin rip_inshash(inp); 780a152f8a3SRobert Watson SOCK_LOCK(so); 781a152f8a3SRobert Watson so->so_state &= ~SS_ISCONNECTED; 782a152f8a3SRobert Watson SOCK_UNLOCK(so); 783117bcae7SGarrett Wollman } 784df8bae1dSRodney W. Grimes 785ac45e92fSRobert Watson static void 786117bcae7SGarrett Wollman rip_abort(struct socket *so) 787df8bae1dSRodney W. Grimes { 7888b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 78950d7c061SSam Leffler struct inpcb *inp; 79050d7c061SSam Leffler 79150d7c061SSam Leffler inp = sotoinpcb(so); 79214ba8addSRobert Watson KASSERT(inp != NULL, ("rip_abort: inp == NULL")); 793a152f8a3SRobert Watson 794603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 7958501a69cSRobert Watson INP_WLOCK(inp); 796a152f8a3SRobert Watson rip_dodisconnect(so, inp); 7978501a69cSRobert Watson INP_WUNLOCK(inp); 798603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 799a152f8a3SRobert Watson } 800a152f8a3SRobert Watson 801a152f8a3SRobert Watson static void 802a152f8a3SRobert Watson rip_close(struct socket *so) 803a152f8a3SRobert Watson { 8048b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 805a152f8a3SRobert Watson struct inpcb *inp; 806a152f8a3SRobert Watson 807a152f8a3SRobert Watson inp = sotoinpcb(so); 808a152f8a3SRobert Watson KASSERT(inp != NULL, ("rip_close: inp == NULL")); 809a152f8a3SRobert Watson 810603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 8118501a69cSRobert Watson INP_WLOCK(inp); 812a152f8a3SRobert Watson rip_dodisconnect(so, inp); 8138501a69cSRobert Watson INP_WUNLOCK(inp); 814603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 815117bcae7SGarrett Wollman } 816117bcae7SGarrett Wollman 817117bcae7SGarrett Wollman static int 818117bcae7SGarrett Wollman rip_disconnect(struct socket *so) 819117bcae7SGarrett Wollman { 8208b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 821eb16472fSMaxim Konovalov struct inpcb *inp; 822eb16472fSMaxim Konovalov 8234cc20ab1SSeigo Tanimura if ((so->so_state & SS_ISCONNECTED) == 0) 8240ae76120SRobert Watson return (ENOTCONN); 825eb16472fSMaxim Konovalov 826eb16472fSMaxim Konovalov inp = sotoinpcb(so); 827eb16472fSMaxim Konovalov KASSERT(inp != NULL, ("rip_disconnect: inp == NULL")); 8280ae76120SRobert Watson 829603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 8308501a69cSRobert Watson INP_WLOCK(inp); 831a152f8a3SRobert Watson rip_dodisconnect(so, inp); 8328501a69cSRobert Watson INP_WUNLOCK(inp); 833603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 83414ba8addSRobert Watson return (0); 835117bcae7SGarrett Wollman } 836117bcae7SGarrett Wollman 837117bcae7SGarrett Wollman static int 838b40ce416SJulian Elischer rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 839117bcae7SGarrett Wollman { 8408b615593SMarko Zec INIT_VNET_NET(so->so_vnet); 8418b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 84257bf258eSGarrett Wollman struct sockaddr_in *addr = (struct sockaddr_in *)nam; 84350d7c061SSam Leffler struct inpcb *inp; 844b89e82ddSJamie Gritton int error; 845df8bae1dSRodney W. Grimes 84657bf258eSGarrett Wollman if (nam->sa_len != sizeof(*addr)) 8470ae76120SRobert Watson return (EINVAL); 848117bcae7SGarrett Wollman 849b89e82ddSJamie Gritton error = prison_check_ip4(td->td_ucred, &addr->sin_addr); 850b89e82ddSJamie Gritton if (error != 0) 851b89e82ddSJamie Gritton return (error); 8525a59cefcSBosko Milekic 853603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_ifnet) || 85450d7c061SSam Leffler (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || 855032dcc76SLuigi Rizzo (addr->sin_addr.s_addr && 856117bcae7SGarrett Wollman ifa_ifwithaddr((struct sockaddr *)addr) == 0)) 8570ae76120SRobert Watson return (EADDRNOTAVAIL); 85850d7c061SSam Leffler 85950d7c061SSam Leffler inp = sotoinpcb(so); 86014ba8addSRobert Watson KASSERT(inp != NULL, ("rip_bind: inp == NULL")); 8610ae76120SRobert Watson 862603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 8638501a69cSRobert Watson INP_WLOCK(inp); 8649ed324c9SAlexander Motin rip_delhash(inp); 865df8bae1dSRodney W. Grimes inp->inp_laddr = addr->sin_addr; 8669ed324c9SAlexander Motin rip_inshash(inp); 8678501a69cSRobert Watson INP_WUNLOCK(inp); 868603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 8690ae76120SRobert Watson return (0); 870df8bae1dSRodney W. Grimes } 871117bcae7SGarrett Wollman 872117bcae7SGarrett Wollman static int 873b40ce416SJulian Elischer rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 874df8bae1dSRodney W. Grimes { 8758b615593SMarko Zec INIT_VNET_NET(so->so_vnet); 8768b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 87757bf258eSGarrett Wollman struct sockaddr_in *addr = (struct sockaddr_in *)nam; 87850d7c061SSam Leffler struct inpcb *inp; 879df8bae1dSRodney W. Grimes 88057bf258eSGarrett Wollman if (nam->sa_len != sizeof(*addr)) 8810ae76120SRobert Watson return (EINVAL); 882603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_ifnet)) 8830ae76120SRobert Watson return (EADDRNOTAVAIL); 88450d7c061SSam Leffler if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) 8850ae76120SRobert Watson return (EAFNOSUPPORT); 88650d7c061SSam Leffler 88750d7c061SSam Leffler inp = sotoinpcb(so); 88814ba8addSRobert Watson KASSERT(inp != NULL, ("rip_connect: inp == NULL")); 8890ae76120SRobert Watson 890603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 8918501a69cSRobert Watson INP_WLOCK(inp); 8929ed324c9SAlexander Motin rip_delhash(inp); 893df8bae1dSRodney W. Grimes inp->inp_faddr = addr->sin_addr; 8949ed324c9SAlexander Motin rip_inshash(inp); 895df8bae1dSRodney W. Grimes soisconnected(so); 8968501a69cSRobert Watson INP_WUNLOCK(inp); 897603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 8980ae76120SRobert Watson return (0); 899df8bae1dSRodney W. Grimes } 900df8bae1dSRodney W. Grimes 901117bcae7SGarrett Wollman static int 902117bcae7SGarrett Wollman rip_shutdown(struct socket *so) 903df8bae1dSRodney W. Grimes { 90450d7c061SSam Leffler struct inpcb *inp; 90550d7c061SSam Leffler 90650d7c061SSam Leffler inp = sotoinpcb(so); 90714ba8addSRobert Watson KASSERT(inp != NULL, ("rip_shutdown: inp == NULL")); 9080ae76120SRobert Watson 9098501a69cSRobert Watson INP_WLOCK(inp); 910117bcae7SGarrett Wollman socantsendmore(so); 9118501a69cSRobert Watson INP_WUNLOCK(inp); 9120ae76120SRobert Watson return (0); 913117bcae7SGarrett Wollman } 914117bcae7SGarrett Wollman 915117bcae7SGarrett Wollman static int 91657bf258eSGarrett Wollman rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 917b40ce416SJulian Elischer struct mbuf *control, struct thread *td) 918117bcae7SGarrett Wollman { 91950d7c061SSam Leffler struct inpcb *inp; 92050d7c061SSam Leffler u_long dst; 921df8bae1dSRodney W. Grimes 92250d7c061SSam Leffler inp = sotoinpcb(so); 92314ba8addSRobert Watson KASSERT(inp != NULL, ("rip_send: inp == NULL")); 9240ae76120SRobert Watson 92514ba8addSRobert Watson /* 92614ba8addSRobert Watson * Note: 'dst' reads below are unlocked. 92714ba8addSRobert Watson */ 928df8bae1dSRodney W. Grimes if (so->so_state & SS_ISCONNECTED) { 929df8bae1dSRodney W. Grimes if (nam) { 930117bcae7SGarrett Wollman m_freem(m); 9310ae76120SRobert Watson return (EISCONN); 932df8bae1dSRodney W. Grimes } 93314ba8addSRobert Watson dst = inp->inp_faddr.s_addr; /* Unlocked read. */ 934df8bae1dSRodney W. Grimes } else { 935df8bae1dSRodney W. Grimes if (nam == NULL) { 936117bcae7SGarrett Wollman m_freem(m); 9370ae76120SRobert Watson return (ENOTCONN); 938df8bae1dSRodney W. Grimes } 93957bf258eSGarrett Wollman dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; 940df8bae1dSRodney W. Grimes } 9410ae76120SRobert Watson return (rip_output(m, so, dst)); 942df8bae1dSRodney W. Grimes } 943df8bae1dSRodney W. Grimes 94498271db4SGarrett Wollman static int 94582d9ae4eSPoul-Henning Kamp rip_pcblist(SYSCTL_HANDLER_ARGS) 94698271db4SGarrett Wollman { 9478b615593SMarko Zec INIT_VNET_INET(curvnet); 9483b6dd5a9SSam Leffler int error, i, n; 94998271db4SGarrett Wollman struct inpcb *inp, **inp_list; 95098271db4SGarrett Wollman inp_gen_t gencnt; 95198271db4SGarrett Wollman struct xinpgen xig; 95298271db4SGarrett Wollman 95398271db4SGarrett Wollman /* 95498271db4SGarrett Wollman * The process of preparing the TCB list is too time-consuming and 95598271db4SGarrett Wollman * resource-intensive to repeat twice on every request. 95698271db4SGarrett Wollman */ 95798271db4SGarrett Wollman if (req->oldptr == 0) { 958603724d3SBjoern A. Zeeb n = V_ripcbinfo.ipi_count; 95998271db4SGarrett Wollman req->oldidx = 2 * (sizeof xig) 96098271db4SGarrett Wollman + (n + n/8) * sizeof(struct xinpcb); 9610ae76120SRobert Watson return (0); 96298271db4SGarrett Wollman } 96398271db4SGarrett Wollman 96498271db4SGarrett Wollman if (req->newptr != 0) 9650ae76120SRobert Watson return (EPERM); 96698271db4SGarrett Wollman 96798271db4SGarrett Wollman /* 96898271db4SGarrett Wollman * OK, now we're committed to doing something. 96998271db4SGarrett Wollman */ 970603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_ripcbinfo); 971603724d3SBjoern A. Zeeb gencnt = V_ripcbinfo.ipi_gencnt; 972603724d3SBjoern A. Zeeb n = V_ripcbinfo.ipi_count; 973603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_ripcbinfo); 97498271db4SGarrett Wollman 97598271db4SGarrett Wollman xig.xig_len = sizeof xig; 97698271db4SGarrett Wollman xig.xig_count = n; 97798271db4SGarrett Wollman xig.xig_gen = gencnt; 97898271db4SGarrett Wollman xig.xig_sogen = so_gencnt; 97998271db4SGarrett Wollman error = SYSCTL_OUT(req, &xig, sizeof xig); 98098271db4SGarrett Wollman if (error) 9810ae76120SRobert Watson return (error); 98298271db4SGarrett Wollman 983a163d034SWarner Losh inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 98498271db4SGarrett Wollman if (inp_list == 0) 9850ae76120SRobert Watson return (ENOMEM); 98698271db4SGarrett Wollman 987603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_ripcbinfo); 988603724d3SBjoern A. Zeeb for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; 989fc2ffbe6SPoul-Henning Kamp inp = LIST_NEXT(inp, inp_list)) { 9909ad11dd8SRobert Watson INP_RLOCK(inp); 991f34f3a70SSam Leffler if (inp->inp_gencnt <= gencnt && 992f08ef6c5SBjoern A. Zeeb cr_canseeinpcb(req->td->td_ucred, inp) == 0) { 9933b6dd5a9SSam Leffler /* XXX held references? */ 99498271db4SGarrett Wollman inp_list[i++] = inp; 99598271db4SGarrett Wollman } 9969ad11dd8SRobert Watson INP_RUNLOCK(inp); 9974787fd37SPaul Saab } 998603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_ripcbinfo); 99998271db4SGarrett Wollman n = i; 100098271db4SGarrett Wollman 100198271db4SGarrett Wollman error = 0; 100298271db4SGarrett Wollman for (i = 0; i < n; i++) { 100398271db4SGarrett Wollman inp = inp_list[i]; 10049ad11dd8SRobert Watson INP_RLOCK(inp); 100598271db4SGarrett Wollman if (inp->inp_gencnt <= gencnt) { 100698271db4SGarrett Wollman struct xinpcb xi; 10073bb87a6cSKip Macy 1008fd94099eSColin Percival bzero(&xi, sizeof(xi)); 100998271db4SGarrett Wollman xi.xi_len = sizeof xi; 101098271db4SGarrett Wollman /* XXX should avoid extra copy */ 101198271db4SGarrett Wollman bcopy(inp, &xi.xi_inp, sizeof *inp); 101298271db4SGarrett Wollman if (inp->inp_socket) 101398271db4SGarrett Wollman sotoxsocket(inp->inp_socket, &xi.xi_socket); 10149ad11dd8SRobert Watson INP_RUNLOCK(inp); 101598271db4SGarrett Wollman error = SYSCTL_OUT(req, &xi, sizeof xi); 1016d915b280SStephan Uphoff } else 10179ad11dd8SRobert Watson INP_RUNLOCK(inp); 101898271db4SGarrett Wollman } 101998271db4SGarrett Wollman if (!error) { 102098271db4SGarrett Wollman /* 10210ae76120SRobert Watson * Give the user an updated idea of our state. If the 10220ae76120SRobert Watson * generation differs from what we told her before, she knows 10230ae76120SRobert Watson * that something happened while we were processing this 10240ae76120SRobert Watson * request, and it might be necessary to retry. 102598271db4SGarrett Wollman */ 1026603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_ripcbinfo); 1027603724d3SBjoern A. Zeeb xig.xig_gen = V_ripcbinfo.ipi_gencnt; 102898271db4SGarrett Wollman xig.xig_sogen = so_gencnt; 1029603724d3SBjoern A. Zeeb xig.xig_count = V_ripcbinfo.ipi_count; 1030603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_ripcbinfo); 103198271db4SGarrett Wollman error = SYSCTL_OUT(req, &xig, sizeof xig); 103298271db4SGarrett Wollman } 103398271db4SGarrett Wollman free(inp_list, M_TEMP); 10340ae76120SRobert Watson return (error); 103598271db4SGarrett Wollman } 103698271db4SGarrett Wollman 103798271db4SGarrett Wollman SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, 103898271db4SGarrett Wollman rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); 103998271db4SGarrett Wollman 1040117bcae7SGarrett Wollman struct pr_usrreqs rip_usrreqs = { 1041756d52a1SPoul-Henning Kamp .pru_abort = rip_abort, 1042756d52a1SPoul-Henning Kamp .pru_attach = rip_attach, 1043756d52a1SPoul-Henning Kamp .pru_bind = rip_bind, 1044756d52a1SPoul-Henning Kamp .pru_connect = rip_connect, 1045756d52a1SPoul-Henning Kamp .pru_control = in_control, 1046756d52a1SPoul-Henning Kamp .pru_detach = rip_detach, 1047756d52a1SPoul-Henning Kamp .pru_disconnect = rip_disconnect, 104854d642bbSRobert Watson .pru_peeraddr = in_getpeeraddr, 1049756d52a1SPoul-Henning Kamp .pru_send = rip_send, 1050756d52a1SPoul-Henning Kamp .pru_shutdown = rip_shutdown, 105154d642bbSRobert Watson .pru_sockaddr = in_getsockaddr, 1052a152f8a3SRobert Watson .pru_sosetlabel = in_pcbsosetlabel, 1053a152f8a3SRobert Watson .pru_close = rip_close, 1054117bcae7SGarrett Wollman }; 1055