1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 30ae76120SRobert Watson * The Regents of the University of California. 40ae76120SRobert Watson * All rights reserved. 5df8bae1dSRodney W. Grimes * 6df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 7df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 8df8bae1dSRodney W. Grimes * are met: 9df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 10df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 11df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 12df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 13df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 14df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 15df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 16df8bae1dSRodney W. Grimes * without specific prior written permission. 17df8bae1dSRodney W. Grimes * 18df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28df8bae1dSRodney W. Grimes * SUCH DAMAGE. 29df8bae1dSRodney W. Grimes * 3025f26ad8SGarrett Wollman * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 31df8bae1dSRodney W. Grimes */ 32df8bae1dSRodney W. Grimes 334b421e2dSMike Silbersack #include <sys/cdefs.h> 344b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 354b421e2dSMike Silbersack 366a800098SYoshinobu Inoue #include "opt_inet6.h" 376a800098SYoshinobu Inoue #include "opt_ipsec.h" 384ea889c6SRobert Watson #include "opt_mac.h" 396a800098SYoshinobu Inoue 40df8bae1dSRodney W. Grimes #include <sys/param.h> 415a59cefcSBosko Milekic #include <sys/jail.h> 42117bcae7SGarrett Wollman #include <sys/kernel.h> 43960ed29cSSeigo Tanimura #include <sys/lock.h> 44df8bae1dSRodney W. Grimes #include <sys/malloc.h> 45df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 46acd3428bSRobert Watson #include <sys/priv.h> 474787fd37SPaul Saab #include <sys/proc.h> 48df8bae1dSRodney W. Grimes #include <sys/protosw.h> 49385195c0SMarko Zec #include <sys/rwlock.h> 50960ed29cSSeigo Tanimura #include <sys/signalvar.h> 51117bcae7SGarrett Wollman #include <sys/socket.h> 52df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 53960ed29cSSeigo Tanimura #include <sys/sx.h> 54117bcae7SGarrett Wollman #include <sys/sysctl.h> 55960ed29cSSeigo Tanimura #include <sys/systm.h> 56603724d3SBjoern A. Zeeb #include <sys/vimage.h> 578781d8e9SBruce Evans 5869c2d429SJeff Roberson #include <vm/uma.h> 59df8bae1dSRodney W. Grimes 60df8bae1dSRodney W. Grimes #include <net/if.h> 61df8bae1dSRodney W. Grimes #include <net/route.h> 624b79449eSBjoern A. Zeeb #include <net/vnet.h> 63df8bae1dSRodney W. Grimes 64df8bae1dSRodney W. Grimes #include <netinet/in.h> 65df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 66c1f8a6ceSDavid Greenman #include <netinet/in_pcb.h> 67c1f8a6ceSDavid Greenman #include <netinet/in_var.h> 68960ed29cSSeigo Tanimura #include <netinet/ip.h> 69df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 70df8bae1dSRodney W. Grimes #include <netinet/ip_mroute.h> 71df8bae1dSRodney W. Grimes 72100ba1a6SJordan K. Hubbard #include <netinet/ip_fw.h> 73db69a05dSPaul Saab #include <netinet/ip_dummynet.h> 744b79449eSBjoern A. Zeeb #include <netinet/vinet.h> 75100ba1a6SJordan K. Hubbard 76b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 77b9234fafSSam Leffler #include <netipsec/ipsec.h> 78b2630c29SGeorge V. Neville-Neil #endif /*IPSEC*/ 79b9234fafSSam Leffler 80aed55708SRobert Watson #include <security/mac/mac_framework.h> 81aed55708SRobert Watson 8244e33a07SMarko Zec #ifdef VIMAGE_GLOBALS 8382cd038dSYoshinobu Inoue struct inpcbhead ripcb; 8482cd038dSYoshinobu Inoue struct inpcbinfo ripcbinfo; 8544e33a07SMarko Zec #endif 86df8bae1dSRodney W. Grimes 87db69a05dSPaul Saab /* control hooks for ipfw and dummynet */ 889b932e9eSAndre Oppermann ip_fw_ctl_t *ip_fw_ctl_ptr = NULL; 899b932e9eSAndre Oppermann ip_dn_ctl_t *ip_dn_ctl_ptr = NULL; 90db69a05dSPaul Saab 91df8bae1dSRodney W. Grimes /* 920ae76120SRobert Watson * Hooks for multicast routing. They all default to NULL, so leave them not 930ae76120SRobert Watson * initialized and rely on BSS being set to 0. 94bbb4330bSLuigi Rizzo */ 95bbb4330bSLuigi Rizzo 960ae76120SRobert Watson /* 970ae76120SRobert Watson * The socket used to communicate with the multicast routing daemon. 980ae76120SRobert Watson */ 9944e33a07SMarko Zec #ifdef VIMAGE_GLOBALS 100bbb4330bSLuigi Rizzo struct socket *ip_mrouter; 10144e33a07SMarko Zec #endif 102bbb4330bSLuigi Rizzo 1030ae76120SRobert Watson /* 1040ae76120SRobert Watson * The various mrouter and rsvp functions. 1050ae76120SRobert Watson */ 106bbb4330bSLuigi Rizzo int (*ip_mrouter_set)(struct socket *, struct sockopt *); 107bbb4330bSLuigi Rizzo int (*ip_mrouter_get)(struct socket *, struct sockopt *); 108bbb4330bSLuigi Rizzo int (*ip_mrouter_done)(void); 109bbb4330bSLuigi Rizzo int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, 110bbb4330bSLuigi Rizzo struct ip_moptions *); 1118b07e49aSJulian Elischer int (*mrt_ioctl)(int, caddr_t, int); 112bbb4330bSLuigi Rizzo int (*legal_vif_num)(int); 113bbb4330bSLuigi Rizzo u_long (*ip_mcast_src)(int); 114bbb4330bSLuigi Rizzo 115bbb4330bSLuigi Rizzo void (*rsvp_input_p)(struct mbuf *m, int off); 116bbb4330bSLuigi Rizzo int (*ip_rsvp_vif)(struct socket *, struct sockopt *); 117bbb4330bSLuigi Rizzo void (*ip_rsvp_force_done)(struct socket *); 118bbb4330bSLuigi Rizzo 119bbb4330bSLuigi Rizzo /* 1209ed324c9SAlexander Motin * Hash functions 1219ed324c9SAlexander Motin */ 1229ed324c9SAlexander Motin 1239ed324c9SAlexander Motin #define INP_PCBHASH_RAW_SIZE 256 1249ed324c9SAlexander Motin #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \ 1259ed324c9SAlexander Motin (((proto) + (laddr) + (faddr)) % (mask) + 1) 1269ed324c9SAlexander Motin 1279ed324c9SAlexander Motin static void 1289ed324c9SAlexander Motin rip_inshash(struct inpcb *inp) 1299ed324c9SAlexander Motin { 1309ed324c9SAlexander Motin struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1319ed324c9SAlexander Motin struct inpcbhead *pcbhash; 1329ed324c9SAlexander Motin int hash; 1339ed324c9SAlexander Motin 1349ed324c9SAlexander Motin INP_INFO_WLOCK_ASSERT(pcbinfo); 1359ed324c9SAlexander Motin INP_WLOCK_ASSERT(inp); 1369ed324c9SAlexander Motin 13718f401c6SAlexander Motin if (inp->inp_ip_p != 0 && 13818f401c6SAlexander Motin inp->inp_laddr.s_addr != INADDR_ANY && 13918f401c6SAlexander Motin inp->inp_faddr.s_addr != INADDR_ANY) { 1409ed324c9SAlexander Motin hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr, 1419ed324c9SAlexander Motin inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask); 14218f401c6SAlexander Motin } else 1439ed324c9SAlexander Motin hash = 0; 1449ed324c9SAlexander Motin pcbhash = &pcbinfo->ipi_hashbase[hash]; 1459ed324c9SAlexander Motin LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1469ed324c9SAlexander Motin } 1479ed324c9SAlexander Motin 1489ed324c9SAlexander Motin static void 1499ed324c9SAlexander Motin rip_delhash(struct inpcb *inp) 1509ed324c9SAlexander Motin { 15118f401c6SAlexander Motin 15218f401c6SAlexander Motin INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 1539ed324c9SAlexander Motin INP_WLOCK_ASSERT(inp); 15418f401c6SAlexander Motin 1559ed324c9SAlexander Motin LIST_REMOVE(inp, inp_hash); 1569ed324c9SAlexander Motin } 1579ed324c9SAlexander Motin 1589ed324c9SAlexander Motin /* 159df8bae1dSRodney W. Grimes * Raw interface to IP protocol. 160df8bae1dSRodney W. Grimes */ 161df8bae1dSRodney W. Grimes 162df8bae1dSRodney W. Grimes /* 163032dcc76SLuigi Rizzo * Initialize raw connection block q. 164df8bae1dSRodney W. Grimes */ 1654f590175SPaul Saab static void 1664f590175SPaul Saab rip_zone_change(void *tag) 1674f590175SPaul Saab { 1688b615593SMarko Zec INIT_VNET_INET(curvnet); 1694f590175SPaul Saab 170603724d3SBjoern A. Zeeb uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); 1714f590175SPaul Saab } 1724f590175SPaul Saab 173d915b280SStephan Uphoff static int 174d915b280SStephan Uphoff rip_inpcb_init(void *mem, int size, int flags) 175d915b280SStephan Uphoff { 17608651e1fSJohn Baldwin struct inpcb *inp = mem; 17708651e1fSJohn Baldwin 178d915b280SStephan Uphoff INP_LOCK_INIT(inp, "inp", "rawinp"); 179d915b280SStephan Uphoff return (0); 180d915b280SStephan Uphoff } 181d915b280SStephan Uphoff 182df8bae1dSRodney W. Grimes void 183f2565d68SRobert Watson rip_init(void) 184df8bae1dSRodney W. Grimes { 1858b615593SMarko Zec INIT_VNET_INET(curvnet); 186f2565d68SRobert Watson 187603724d3SBjoern A. Zeeb INP_INFO_LOCK_INIT(&V_ripcbinfo, "rip"); 188603724d3SBjoern A. Zeeb LIST_INIT(&V_ripcb); 189603724d3SBjoern A. Zeeb V_ripcbinfo.ipi_listhead = &V_ripcb; 190ac957cd2SJulian Elischer V_ripcbinfo.ipi_hashbase = 191ac957cd2SJulian Elischer hashinit(INP_PCBHASH_RAW_SIZE, M_PCB, &V_ripcbinfo.ipi_hashmask); 192ac957cd2SJulian Elischer V_ripcbinfo.ipi_porthashbase = 193ac957cd2SJulian Elischer hashinit(1, M_PCB, &V_ripcbinfo.ipi_porthashmask); 194603724d3SBjoern A. Zeeb V_ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb), 195d915b280SStephan Uphoff NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 196603724d3SBjoern A. Zeeb uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); 1970ae76120SRobert Watson EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, 1980ae76120SRobert Watson EVENTHANDLER_PRI_ANY); 199df8bae1dSRodney W. Grimes } 200df8bae1dSRodney W. Grimes 2013b6dd5a9SSam Leffler static int 2023b19fa35SRobert Watson rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, 2033b19fa35SRobert Watson struct sockaddr_in *ripsrc) 2043b6dd5a9SSam Leffler { 2054ea889c6SRobert Watson int policyfail = 0; 20633841545SHajimu UMEMOTO 2079ad11dd8SRobert Watson INP_RLOCK_ASSERT(last); 208cbe42d48SRobert Watson 209b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 210da0f4099SHajimu UMEMOTO /* check AH/ESP integrity. */ 211da0f4099SHajimu UMEMOTO if (ipsec4_in_reject(n, last)) { 212da0f4099SHajimu UMEMOTO policyfail = 1; 213b9234fafSSam Leffler } 214b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 2154ea889c6SRobert Watson #ifdef MAC 21630d239bcSRobert Watson if (!policyfail && mac_inpcb_check_deliver(last, n) != 0) 2174ea889c6SRobert Watson policyfail = 1; 2184ea889c6SRobert Watson #endif 219936cd18dSAndre Oppermann /* Check the minimum TTL for socket. */ 220936cd18dSAndre Oppermann if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl) 221936cd18dSAndre Oppermann policyfail = 1; 2223b6dd5a9SSam Leffler if (!policyfail) { 2233b6dd5a9SSam Leffler struct mbuf *opts = NULL; 2241e4d7da7SRobert Watson struct socket *so; 2253b6dd5a9SSam Leffler 2261e4d7da7SRobert Watson so = last->inp_socket; 2273b6dd5a9SSam Leffler if ((last->inp_flags & INP_CONTROLOPTS) || 2281fd7af26SAndre Oppermann (so->so_options & (SO_TIMESTAMP | SO_BINTIME))) 22982c23ebaSBill Fenner ip_savecontrol(last, &opts, ip, n); 2301e4d7da7SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 2311e4d7da7SRobert Watson if (sbappendaddr_locked(&so->so_rcv, 2323b19fa35SRobert Watson (struct sockaddr *)ripsrc, n, opts) == 0) { 233df8bae1dSRodney W. Grimes /* should notify about lost packet */ 234df8bae1dSRodney W. Grimes m_freem(n); 23582c23ebaSBill Fenner if (opts) 23682c23ebaSBill Fenner m_freem(opts); 2371e4d7da7SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 2384cc20ab1SSeigo Tanimura } else 2391e4d7da7SRobert Watson sorwakeup_locked(so); 2403b6dd5a9SSam Leffler } else 2413b6dd5a9SSam Leffler m_freem(n); 2420ae76120SRobert Watson return (policyfail); 243df8bae1dSRodney W. Grimes } 2443b6dd5a9SSam Leffler 2453b6dd5a9SSam Leffler /* 2460ae76120SRobert Watson * Setup generic address and protocol structures for raw_input routine, then 2470ae76120SRobert Watson * pass them along with mbuf chain. 2483b6dd5a9SSam Leffler */ 2493b6dd5a9SSam Leffler void 2503b6dd5a9SSam Leffler rip_input(struct mbuf *m, int off) 2513b6dd5a9SSam Leffler { 2528b615593SMarko Zec INIT_VNET_INET(curvnet); 2533b6dd5a9SSam Leffler struct ip *ip = mtod(m, struct ip *); 2543b6dd5a9SSam Leffler int proto = ip->ip_p; 2553b6dd5a9SSam Leffler struct inpcb *inp, *last; 2563b19fa35SRobert Watson struct sockaddr_in ripsrc; 2579ed324c9SAlexander Motin int hash; 2583b6dd5a9SSam Leffler 2593b19fa35SRobert Watson bzero(&ripsrc, sizeof(ripsrc)); 2603b19fa35SRobert Watson ripsrc.sin_len = sizeof(ripsrc); 2613b19fa35SRobert Watson ripsrc.sin_family = AF_INET; 2623b6dd5a9SSam Leffler ripsrc.sin_addr = ip->ip_src; 2633b6dd5a9SSam Leffler last = NULL; 2649ed324c9SAlexander Motin hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, 265603724d3SBjoern A. Zeeb ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); 266603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_ripcbinfo); 267603724d3SBjoern A. Zeeb LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { 2680ca3b096SAlexander Motin if (inp->inp_ip_p != proto) 2690ca3b096SAlexander Motin continue; 2700ca3b096SAlexander Motin #ifdef INET6 27186d02c5cSBjoern A. Zeeb /* XXX inp locking */ 2720ca3b096SAlexander Motin if ((inp->inp_vflag & INP_IPV4) == 0) 2730ca3b096SAlexander Motin continue; 2740ca3b096SAlexander Motin #endif 2750ca3b096SAlexander Motin if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 2760ca3b096SAlexander Motin continue; 2770ca3b096SAlexander Motin if (inp->inp_faddr.s_addr != ip->ip_src.s_addr) 2780ca3b096SAlexander Motin continue; 279413628a7SBjoern A. Zeeb if (jailed(inp->inp_cred)) { 280413628a7SBjoern A. Zeeb if (!prison_check_ip4(inp->inp_cred, &ip->ip_dst)) 2819ed324c9SAlexander Motin continue; 2829ed324c9SAlexander Motin } 2833bb87a6cSKip Macy if (last != NULL) { 2849ed324c9SAlexander Motin struct mbuf *n; 2859ed324c9SAlexander Motin 2869ed324c9SAlexander Motin n = m_copy(m, 0, (int)M_COPYALL); 2879ed324c9SAlexander Motin if (n != NULL) 2889ed324c9SAlexander Motin (void) rip_append(last, ip, n, &ripsrc); 2899ed324c9SAlexander Motin /* XXX count dropped packet */ 2909ed324c9SAlexander Motin INP_RUNLOCK(last); 2919ed324c9SAlexander Motin } 29286d02c5cSBjoern A. Zeeb INP_RLOCK(inp); 2939ed324c9SAlexander Motin last = inp; 2949ed324c9SAlexander Motin } 295603724d3SBjoern A. Zeeb LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { 2960ca3b096SAlexander Motin if (inp->inp_ip_p && inp->inp_ip_p != proto) 2973b6dd5a9SSam Leffler continue; 2983b6dd5a9SSam Leffler #ifdef INET6 29986d02c5cSBjoern A. Zeeb /* XXX inp locking */ 3003b6dd5a9SSam Leffler if ((inp->inp_vflag & INP_IPV4) == 0) 3010ca3b096SAlexander Motin continue; 3023b6dd5a9SSam Leffler #endif 3033b6dd5a9SSam Leffler if (inp->inp_laddr.s_addr && 3043b6dd5a9SSam Leffler inp->inp_laddr.s_addr != ip->ip_dst.s_addr) 3050ca3b096SAlexander Motin continue; 3063b6dd5a9SSam Leffler if (inp->inp_faddr.s_addr && 3073b6dd5a9SSam Leffler inp->inp_faddr.s_addr != ip->ip_src.s_addr) 3080ca3b096SAlexander Motin continue; 309413628a7SBjoern A. Zeeb if (jailed(inp->inp_cred)) { 310413628a7SBjoern A. Zeeb if (!prison_check_ip4(inp->inp_cred, &ip->ip_dst)) 3110ca3b096SAlexander Motin continue; 3120ca3b096SAlexander Motin } 3133bb87a6cSKip Macy if (last != NULL) { 3143b6dd5a9SSam Leffler struct mbuf *n; 3153b6dd5a9SSam Leffler 3163b6dd5a9SSam Leffler n = m_copy(m, 0, (int)M_COPYALL); 3173b6dd5a9SSam Leffler if (n != NULL) 3183b19fa35SRobert Watson (void) rip_append(last, ip, n, &ripsrc); 3193b6dd5a9SSam Leffler /* XXX count dropped packet */ 3209ad11dd8SRobert Watson INP_RUNLOCK(last); 321df8bae1dSRodney W. Grimes } 32286d02c5cSBjoern A. Zeeb INP_RLOCK(inp); 32382c23ebaSBill Fenner last = inp; 324df8bae1dSRodney W. Grimes } 325603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_ripcbinfo); 3263b6dd5a9SSam Leffler if (last != NULL) { 3273b19fa35SRobert Watson if (rip_append(last, ip, m, &ripsrc) != 0) 328603724d3SBjoern A. Zeeb V_ipstat.ips_delivered--; 3299ad11dd8SRobert Watson INP_RUNLOCK(last); 330df8bae1dSRodney W. Grimes } else { 331df8bae1dSRodney W. Grimes m_freem(m); 332603724d3SBjoern A. Zeeb V_ipstat.ips_noproto++; 333603724d3SBjoern A. Zeeb V_ipstat.ips_delivered--; 334df8bae1dSRodney W. Grimes } 335df8bae1dSRodney W. Grimes } 336df8bae1dSRodney W. Grimes 337df8bae1dSRodney W. Grimes /* 3380ae76120SRobert Watson * Generate IP header and pass packet to ip_output. Tack on options user may 3390ae76120SRobert Watson * have setup with control call. 340df8bae1dSRodney W. Grimes */ 341df8bae1dSRodney W. Grimes int 3423b6dd5a9SSam Leffler rip_output(struct mbuf *m, struct socket *so, u_long dst) 343df8bae1dSRodney W. Grimes { 3448b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 3453b6dd5a9SSam Leffler struct ip *ip; 346ac830b58SBosko Milekic int error; 3473b6dd5a9SSam Leffler struct inpcb *inp = sotoinpcb(so); 348b5d47ff5SJohn-Mark Gurney int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | 349b5d47ff5SJohn-Mark Gurney IP_ALLOWBROADCAST; 350df8bae1dSRodney W. Grimes 351df8bae1dSRodney W. Grimes /* 3520ae76120SRobert Watson * If the user handed us a complete IP packet, use it. Otherwise, 3530ae76120SRobert Watson * allocate an mbuf for a header and fill it in. 354df8bae1dSRodney W. Grimes */ 355df8bae1dSRodney W. Grimes if ((inp->inp_flags & INP_HDRINCL) == 0) { 356430d30d8SBill Fenner if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { 357430d30d8SBill Fenner m_freem(m); 358430d30d8SBill Fenner return(EMSGSIZE); 359430d30d8SBill Fenner } 3602d01d331SRobert Watson M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); 3616b48911bSRobert Watson if (m == NULL) 3626b48911bSRobert Watson return(ENOBUFS); 363ac830b58SBosko Milekic 3649ad11dd8SRobert Watson INP_RLOCK(inp); 365df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 3668ce3f3ddSRuslan Ermilov ip->ip_tos = inp->inp_ip_tos; 367b2828ad2SAndre Oppermann if (inp->inp_flags & INP_DONTFRAG) 368b2828ad2SAndre Oppermann ip->ip_off = IP_DF; 369b2828ad2SAndre Oppermann else 370df8bae1dSRodney W. Grimes ip->ip_off = 0; 371ca98b82cSDavid Greenman ip->ip_p = inp->inp_ip_p; 372df8bae1dSRodney W. Grimes ip->ip_len = m->m_pkthdr.len; 373413628a7SBjoern A. Zeeb if (jailed(inp->inp_cred)) { 374413628a7SBjoern A. Zeeb if (prison_getip4(inp->inp_cred, &ip->ip_src)) { 375413628a7SBjoern A. Zeeb INP_RUNLOCK(inp); 376413628a7SBjoern A. Zeeb m_freem(m); 377413628a7SBjoern A. Zeeb return (EPERM); 378413628a7SBjoern A. Zeeb } 379413628a7SBjoern A. Zeeb } else { 380df8bae1dSRodney W. Grimes ip->ip_src = inp->inp_laddr; 381413628a7SBjoern A. Zeeb } 382df8bae1dSRodney W. Grimes ip->ip_dst.s_addr = dst; 3838ce3f3ddSRuslan Ermilov ip->ip_ttl = inp->inp_ip_ttl; 384df8bae1dSRodney W. Grimes } else { 385430d30d8SBill Fenner if (m->m_pkthdr.len > IP_MAXPACKET) { 386430d30d8SBill Fenner m_freem(m); 387430d30d8SBill Fenner return(EMSGSIZE); 388430d30d8SBill Fenner } 3899ad11dd8SRobert Watson INP_RLOCK(inp); 390df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 391413628a7SBjoern A. Zeeb if (!prison_check_ip4(inp->inp_cred, &ip->ip_src)) { 3929ad11dd8SRobert Watson INP_RUNLOCK(inp); 3935a59cefcSBosko Milekic m_freem(m); 3945a59cefcSBosko Milekic return (EPERM); 3955a59cefcSBosko Milekic } 3960ae76120SRobert Watson 3970ae76120SRobert Watson /* 3980ae76120SRobert Watson * Don't allow both user specified and setsockopt options, 3990ae76120SRobert Watson * and don't allow packet length sizes that will crash. 4000ae76120SRobert Watson */ 4010ae76120SRobert Watson if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) 40291108995SBill Fenner || (ip->ip_len > m->m_pkthdr.len) 40353be11f6SPoul-Henning Kamp || (ip->ip_len < (ip->ip_hl << 2))) { 4049ad11dd8SRobert Watson INP_RUNLOCK(inp); 405072b9b24SPaul Traina m_freem(m); 4060ae76120SRobert Watson return (EINVAL); 407072b9b24SPaul Traina } 408df8bae1dSRodney W. Grimes if (ip->ip_id == 0) 4091f44b0a1SDavid Malone ip->ip_id = ip_newid(); 4100ae76120SRobert Watson 4110ae76120SRobert Watson /* 4120ae76120SRobert Watson * XXX prevent ip_output from overwriting header fields. 4130ae76120SRobert Watson */ 414df8bae1dSRodney W. Grimes flags |= IP_RAWOUTPUT; 415603724d3SBjoern A. Zeeb V_ipstat.ips_rawout++; 416df8bae1dSRodney W. Grimes } 4176a800098SYoshinobu Inoue 4186fbfd582SAndre Oppermann if (inp->inp_flags & INP_ONESBCAST) 4198afa2304SBruce M Simpson flags |= IP_SENDONES; 4208afa2304SBruce M Simpson 421ac830b58SBosko Milekic #ifdef MAC 42230d239bcSRobert Watson mac_inpcb_create_mbuf(inp, m); 423ac830b58SBosko Milekic #endif 424ac830b58SBosko Milekic 425ac830b58SBosko Milekic error = ip_output(m, inp->inp_options, NULL, flags, 426ac830b58SBosko Milekic inp->inp_moptions, inp); 4279ad11dd8SRobert Watson INP_RUNLOCK(inp); 4280ae76120SRobert Watson return (error); 429df8bae1dSRodney W. Grimes } 430df8bae1dSRodney W. Grimes 431df8bae1dSRodney W. Grimes /* 432df8bae1dSRodney W. Grimes * Raw IP socket option processing. 43383503a92SRobert Watson * 4346c67b8b6SRobert Watson * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could 4356c67b8b6SRobert Watson * only be created by a privileged process, and as such, socket option 4366c67b8b6SRobert Watson * operations to manage system properties on any raw socket were allowed to 4376c67b8b6SRobert Watson * take place without explicit additional access control checks. However, 4386c67b8b6SRobert Watson * raw sockets can now also be created in jail(), and therefore explicit 4396c67b8b6SRobert Watson * checks are now required. Likewise, raw sockets can be used by a process 4406c67b8b6SRobert Watson * after it gives up privilege, so some caution is required. For options 4416c67b8b6SRobert Watson * passed down to the IP layer via ip_ctloutput(), checks are assumed to be 4426c67b8b6SRobert Watson * performed in ip_ctloutput() and therefore no check occurs here. 44302dd4b5cSRobert Watson * Unilaterally checking priv_check() here breaks normal IP socket option 4446c67b8b6SRobert Watson * operations on raw sockets. 4456c67b8b6SRobert Watson * 4466c67b8b6SRobert Watson * When adding new socket options here, make sure to add access control 4476c67b8b6SRobert Watson * checks here as necessary. 448df8bae1dSRodney W. Grimes */ 449df8bae1dSRodney W. Grimes int 4503b6dd5a9SSam Leffler rip_ctloutput(struct socket *so, struct sockopt *sopt) 451df8bae1dSRodney W. Grimes { 452cfe8b629SGarrett Wollman struct inpcb *inp = sotoinpcb(so); 453cfe8b629SGarrett Wollman int error, optval; 454df8bae1dSRodney W. Grimes 455bc97ba51SJulian Elischer if (sopt->sopt_level != IPPROTO_IP) { 456bc97ba51SJulian Elischer if ((sopt->sopt_level == SOL_SOCKET) && 457bc97ba51SJulian Elischer (sopt->sopt_name == SO_SETFIB)) { 458bc97ba51SJulian Elischer inp->inp_inc.inc_fibnum = so->so_fibnum; 459bc97ba51SJulian Elischer return (0); 460bc97ba51SJulian Elischer } 461df8bae1dSRodney W. Grimes return (EINVAL); 462bc97ba51SJulian Elischer } 463df8bae1dSRodney W. Grimes 46425f26ad8SGarrett Wollman error = 0; 465cfe8b629SGarrett Wollman switch (sopt->sopt_dir) { 466cfe8b629SGarrett Wollman case SOPT_GET: 467cfe8b629SGarrett Wollman switch (sopt->sopt_name) { 468cfe8b629SGarrett Wollman case IP_HDRINCL: 469cfe8b629SGarrett Wollman optval = inp->inp_flags & INP_HDRINCL; 470cfe8b629SGarrett Wollman error = sooptcopyout(sopt, &optval, sizeof optval); 471cfe8b629SGarrett Wollman break; 472df8bae1dSRodney W. Grimes 4737b109fa4SLuigi Rizzo case IP_FW_ADD: /* ADD actually returns the body... */ 47409bb5f75SPoul-Henning Kamp case IP_FW_GET: 475cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_GETSIZE: 476cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_LIST: 477ff2f6fe8SPaolo Pisati case IP_FW_NAT_GET_CONFIG: 478ff2f6fe8SPaolo Pisati case IP_FW_NAT_GET_LOG: 4799b932e9eSAndre Oppermann if (ip_fw_ctl_ptr != NULL) 480cfe8b629SGarrett Wollman error = ip_fw_ctl_ptr(sopt); 4817b109fa4SLuigi Rizzo else 4827b109fa4SLuigi Rizzo error = ENOPROTOOPT; 483cfe8b629SGarrett Wollman break; 4844dd1662bSUgen J.S. Antsilevich 485b715f178SLuigi Rizzo case IP_DUMMYNET_GET: 4869b932e9eSAndre Oppermann if (ip_dn_ctl_ptr != NULL) 487b715f178SLuigi Rizzo error = ip_dn_ctl_ptr(sopt); 4887b109fa4SLuigi Rizzo else 4897b109fa4SLuigi Rizzo error = ENOPROTOOPT; 490b715f178SLuigi Rizzo break ; 4911c5de19aSGarrett Wollman 4921c5de19aSGarrett Wollman case MRT_INIT: 4931c5de19aSGarrett Wollman case MRT_DONE: 4941c5de19aSGarrett Wollman case MRT_ADD_VIF: 4951c5de19aSGarrett Wollman case MRT_DEL_VIF: 4961c5de19aSGarrett Wollman case MRT_ADD_MFC: 4971c5de19aSGarrett Wollman case MRT_DEL_MFC: 4981c5de19aSGarrett Wollman case MRT_VERSION: 4991c5de19aSGarrett Wollman case MRT_ASSERT: 5001e78ac21SJeffrey Hsu case MRT_API_SUPPORT: 5011e78ac21SJeffrey Hsu case MRT_API_CONFIG: 5021e78ac21SJeffrey Hsu case MRT_ADD_BW_UPCALL: 5031e78ac21SJeffrey Hsu case MRT_DEL_BW_UPCALL: 504acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 5056c67b8b6SRobert Watson if (error != 0) 5066c67b8b6SRobert Watson return (error); 507bbb4330bSLuigi Rizzo error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : 508bbb4330bSLuigi Rizzo EOPNOTSUPP; 509cfe8b629SGarrett Wollman break; 510cfe8b629SGarrett Wollman 511cfe8b629SGarrett Wollman default: 512cfe8b629SGarrett Wollman error = ip_ctloutput(so, sopt); 513cfe8b629SGarrett Wollman break; 514df8bae1dSRodney W. Grimes } 515cfe8b629SGarrett Wollman break; 516cfe8b629SGarrett Wollman 517cfe8b629SGarrett Wollman case SOPT_SET: 518cfe8b629SGarrett Wollman switch (sopt->sopt_name) { 519cfe8b629SGarrett Wollman case IP_HDRINCL: 520cfe8b629SGarrett Wollman error = sooptcopyin(sopt, &optval, sizeof optval, 521cfe8b629SGarrett Wollman sizeof optval); 522cfe8b629SGarrett Wollman if (error) 523cfe8b629SGarrett Wollman break; 524cfe8b629SGarrett Wollman if (optval) 525cfe8b629SGarrett Wollman inp->inp_flags |= INP_HDRINCL; 526cfe8b629SGarrett Wollman else 527cfe8b629SGarrett Wollman inp->inp_flags &= ~INP_HDRINCL; 528cfe8b629SGarrett Wollman break; 529cfe8b629SGarrett Wollman 5308ba03966SRuslan Ermilov case IP_FW_ADD: 531cfe8b629SGarrett Wollman case IP_FW_DEL: 532cfe8b629SGarrett Wollman case IP_FW_FLUSH: 533cfe8b629SGarrett Wollman case IP_FW_ZERO: 5340b6c1a83SBrian Feldman case IP_FW_RESETLOG: 535cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_ADD: 536cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_DEL: 537cd8b5ae0SRuslan Ermilov case IP_FW_TABLE_FLUSH: 538ff2f6fe8SPaolo Pisati case IP_FW_NAT_CFG: 539ff2f6fe8SPaolo Pisati case IP_FW_NAT_DEL: 5409b932e9eSAndre Oppermann if (ip_fw_ctl_ptr != NULL) 541cfe8b629SGarrett Wollman error = ip_fw_ctl_ptr(sopt); 5427b109fa4SLuigi Rizzo else 5437b109fa4SLuigi Rizzo error = ENOPROTOOPT; 544cfe8b629SGarrett Wollman break; 545cfe8b629SGarrett Wollman 546b715f178SLuigi Rizzo case IP_DUMMYNET_CONFIGURE: 547b715f178SLuigi Rizzo case IP_DUMMYNET_DEL: 548b715f178SLuigi Rizzo case IP_DUMMYNET_FLUSH: 5499b932e9eSAndre Oppermann if (ip_dn_ctl_ptr != NULL) 550b715f178SLuigi Rizzo error = ip_dn_ctl_ptr(sopt); 5517b109fa4SLuigi Rizzo else 5527b109fa4SLuigi Rizzo error = ENOPROTOOPT ; 553b715f178SLuigi Rizzo break ; 554cfe8b629SGarrett Wollman 555cfe8b629SGarrett Wollman case IP_RSVP_ON: 556acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 5576c67b8b6SRobert Watson if (error != 0) 5586c67b8b6SRobert Watson return (error); 559cfe8b629SGarrett Wollman error = ip_rsvp_init(so); 560cfe8b629SGarrett Wollman break; 561cfe8b629SGarrett Wollman 562cfe8b629SGarrett Wollman case IP_RSVP_OFF: 563acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 5646c67b8b6SRobert Watson if (error != 0) 5656c67b8b6SRobert Watson return (error); 566cfe8b629SGarrett Wollman error = ip_rsvp_done(); 567cfe8b629SGarrett Wollman break; 568cfe8b629SGarrett Wollman 569cfe8b629SGarrett Wollman case IP_RSVP_VIF_ON: 570cfe8b629SGarrett Wollman case IP_RSVP_VIF_OFF: 571acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 5726c67b8b6SRobert Watson if (error != 0) 5736c67b8b6SRobert Watson return (error); 574bbb4330bSLuigi Rizzo error = ip_rsvp_vif ? 575bbb4330bSLuigi Rizzo ip_rsvp_vif(so, sopt) : EINVAL; 576cfe8b629SGarrett Wollman break; 577cfe8b629SGarrett Wollman 578cfe8b629SGarrett Wollman case MRT_INIT: 579cfe8b629SGarrett Wollman case MRT_DONE: 580cfe8b629SGarrett Wollman case MRT_ADD_VIF: 581cfe8b629SGarrett Wollman case MRT_DEL_VIF: 582cfe8b629SGarrett Wollman case MRT_ADD_MFC: 583cfe8b629SGarrett Wollman case MRT_DEL_MFC: 584cfe8b629SGarrett Wollman case MRT_VERSION: 585cfe8b629SGarrett Wollman case MRT_ASSERT: 5861e78ac21SJeffrey Hsu case MRT_API_SUPPORT: 5871e78ac21SJeffrey Hsu case MRT_API_CONFIG: 5881e78ac21SJeffrey Hsu case MRT_ADD_BW_UPCALL: 5891e78ac21SJeffrey Hsu case MRT_DEL_BW_UPCALL: 590acd3428bSRobert Watson error = priv_check(curthread, PRIV_NETINET_MROUTE); 5916c67b8b6SRobert Watson if (error != 0) 5926c67b8b6SRobert Watson return (error); 593bbb4330bSLuigi Rizzo error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : 594bbb4330bSLuigi Rizzo EOPNOTSUPP; 595cfe8b629SGarrett Wollman break; 596cfe8b629SGarrett Wollman 597cfe8b629SGarrett Wollman default: 598cfe8b629SGarrett Wollman error = ip_ctloutput(so, sopt); 599cfe8b629SGarrett Wollman break; 600cfe8b629SGarrett Wollman } 601cfe8b629SGarrett Wollman break; 602cfe8b629SGarrett Wollman } 603cfe8b629SGarrett Wollman 604cfe8b629SGarrett Wollman return (error); 605df8bae1dSRodney W. Grimes } 606df8bae1dSRodney W. Grimes 60739191c8eSGarrett Wollman /* 6080ae76120SRobert Watson * This function exists solely to receive the PRC_IFDOWN messages which are 6090ae76120SRobert Watson * sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, and calls 6100ae76120SRobert Watson * in_ifadown() to remove all routes corresponding to that address. It also 6110ae76120SRobert Watson * receives the PRC_IFUP messages from if_up() and reinstalls the interface 6120ae76120SRobert Watson * routes. 61339191c8eSGarrett Wollman */ 61439191c8eSGarrett Wollman void 6153b6dd5a9SSam Leffler rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) 61639191c8eSGarrett Wollman { 6178b615593SMarko Zec INIT_VNET_INET(curvnet); 61839191c8eSGarrett Wollman struct in_ifaddr *ia; 61939191c8eSGarrett Wollman struct ifnet *ifp; 62039191c8eSGarrett Wollman int err; 62139191c8eSGarrett Wollman int flags; 62239191c8eSGarrett Wollman 62339191c8eSGarrett Wollman switch (cmd) { 62439191c8eSGarrett Wollman case PRC_IFDOWN: 625603724d3SBjoern A. Zeeb TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 62639191c8eSGarrett Wollman if (ia->ia_ifa.ifa_addr == sa 62739191c8eSGarrett Wollman && (ia->ia_flags & IFA_ROUTE)) { 62839191c8eSGarrett Wollman /* 62939191c8eSGarrett Wollman * in_ifscrub kills the interface route. 63039191c8eSGarrett Wollman */ 63139191c8eSGarrett Wollman in_ifscrub(ia->ia_ifp, ia); 63239191c8eSGarrett Wollman /* 6330ae76120SRobert Watson * in_ifadown gets rid of all the rest of the 6340ae76120SRobert Watson * routes. This is not quite the right thing 6350ae76120SRobert Watson * to do, but at least if we are running a 6360ae76120SRobert Watson * routing process they will come back. 63739191c8eSGarrett Wollman */ 63891854268SRuslan Ermilov in_ifadown(&ia->ia_ifa, 0); 63939191c8eSGarrett Wollman break; 64039191c8eSGarrett Wollman } 64139191c8eSGarrett Wollman } 64239191c8eSGarrett Wollman break; 64339191c8eSGarrett Wollman 64439191c8eSGarrett Wollman case PRC_IFUP: 645603724d3SBjoern A. Zeeb TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 64639191c8eSGarrett Wollman if (ia->ia_ifa.ifa_addr == sa) 64739191c8eSGarrett Wollman break; 64839191c8eSGarrett Wollman } 64939191c8eSGarrett Wollman if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) 65039191c8eSGarrett Wollman return; 65139191c8eSGarrett Wollman flags = RTF_UP; 65239191c8eSGarrett Wollman ifp = ia->ia_ifa.ifa_ifp; 65339191c8eSGarrett Wollman 65439191c8eSGarrett Wollman if ((ifp->if_flags & IFF_LOOPBACK) 65539191c8eSGarrett Wollman || (ifp->if_flags & IFF_POINTOPOINT)) 65639191c8eSGarrett Wollman flags |= RTF_HOST; 65739191c8eSGarrett Wollman 65839191c8eSGarrett Wollman err = rtinit(&ia->ia_ifa, RTM_ADD, flags); 65939191c8eSGarrett Wollman if (err == 0) 66039191c8eSGarrett Wollman ia->ia_flags |= IFA_ROUTE; 66139191c8eSGarrett Wollman break; 66239191c8eSGarrett Wollman } 66339191c8eSGarrett Wollman } 66439191c8eSGarrett Wollman 665c7547d1aSBruce M Simpson u_long rip_sendspace = 9216; 666c7547d1aSBruce M Simpson u_long rip_recvspace = 9216; 667df8bae1dSRodney W. Grimes 668e59898ffSMaxime Henrion SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, 6693d177f46SBill Fumerola &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); 670e59898ffSMaxime Henrion SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, 6710ca2861fSRuslan Ermilov &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); 672117bcae7SGarrett Wollman 673117bcae7SGarrett Wollman static int 674b40ce416SJulian Elischer rip_attach(struct socket *so, int proto, struct thread *td) 675df8bae1dSRodney W. Grimes { 6768b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 677117bcae7SGarrett Wollman struct inpcb *inp; 6783b6dd5a9SSam Leffler int error; 679c1f8a6ceSDavid Greenman 680117bcae7SGarrett Wollman inp = sotoinpcb(so); 68114ba8addSRobert Watson KASSERT(inp == NULL, ("rip_attach: inp != NULL")); 68232f9753cSRobert Watson 68332f9753cSRobert Watson error = priv_check(td, PRIV_NETINET_RAW); 684acd3428bSRobert Watson if (error) 6850ae76120SRobert Watson return (error); 68614ba8addSRobert Watson if (proto >= IPPROTO_MAX || proto < 0) 6874d3ffc98SBill Fenner return EPROTONOSUPPORT; 6886a800098SYoshinobu Inoue error = soreserve(so, rip_sendspace, rip_recvspace); 68914ba8addSRobert Watson if (error) 6900ae76120SRobert Watson return (error); 691603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 692603724d3SBjoern A. Zeeb error = in_pcballoc(so, &V_ripcbinfo); 6933b6dd5a9SSam Leffler if (error) { 694603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 6950ae76120SRobert Watson return (error); 6963b6dd5a9SSam Leffler } 697df8bae1dSRodney W. Grimes inp = (struct inpcb *)so->so_pcb; 6986a800098SYoshinobu Inoue inp->inp_vflag |= INP_IPV4; 699ca98b82cSDavid Greenman inp->inp_ip_p = proto; 700603724d3SBjoern A. Zeeb inp->inp_ip_ttl = V_ip_defttl; 7019ed324c9SAlexander Motin rip_inshash(inp); 702603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 7038501a69cSRobert Watson INP_WUNLOCK(inp); 7040ae76120SRobert Watson return (0); 705df8bae1dSRodney W. Grimes } 706117bcae7SGarrett Wollman 70750d7c061SSam Leffler static void 708a152f8a3SRobert Watson rip_detach(struct socket *so) 70950d7c061SSam Leffler { 7108b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 711a152f8a3SRobert Watson struct inpcb *inp; 7123ca1570cSRobert Watson 713a152f8a3SRobert Watson inp = sotoinpcb(so); 714a152f8a3SRobert Watson KASSERT(inp != NULL, ("rip_detach: inp == NULL")); 715a152f8a3SRobert Watson KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 716a152f8a3SRobert Watson ("rip_detach: not closed")); 71750d7c061SSam Leffler 718603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 7198501a69cSRobert Watson INP_WLOCK(inp); 7209ed324c9SAlexander Motin rip_delhash(inp); 721603724d3SBjoern A. Zeeb if (so == V_ip_mrouter && ip_mrouter_done) 72250d7c061SSam Leffler ip_mrouter_done(); 72350d7c061SSam Leffler if (ip_rsvp_force_done) 72450d7c061SSam Leffler ip_rsvp_force_done(so); 725603724d3SBjoern A. Zeeb if (so == V_ip_rsvpd) 72650d7c061SSam Leffler ip_rsvp_done(); 72750d7c061SSam Leffler in_pcbdetach(inp); 72814ba8addSRobert Watson in_pcbfree(inp); 729603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 73050d7c061SSam Leffler } 73150d7c061SSam Leffler 732bc725eafSRobert Watson static void 733a152f8a3SRobert Watson rip_dodisconnect(struct socket *so, struct inpcb *inp) 734117bcae7SGarrett Wollman { 73518f401c6SAlexander Motin 73618f401c6SAlexander Motin INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 7378501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 738a152f8a3SRobert Watson 7399ed324c9SAlexander Motin rip_delhash(inp); 740a152f8a3SRobert Watson inp->inp_faddr.s_addr = INADDR_ANY; 7419ed324c9SAlexander Motin rip_inshash(inp); 742a152f8a3SRobert Watson SOCK_LOCK(so); 743a152f8a3SRobert Watson so->so_state &= ~SS_ISCONNECTED; 744a152f8a3SRobert Watson SOCK_UNLOCK(so); 745117bcae7SGarrett Wollman } 746df8bae1dSRodney W. Grimes 747ac45e92fSRobert Watson static void 748117bcae7SGarrett Wollman rip_abort(struct socket *so) 749df8bae1dSRodney W. Grimes { 7508b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 75150d7c061SSam Leffler struct inpcb *inp; 75250d7c061SSam Leffler 75350d7c061SSam Leffler inp = sotoinpcb(so); 75414ba8addSRobert Watson KASSERT(inp != NULL, ("rip_abort: inp == NULL")); 755a152f8a3SRobert Watson 756603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 7578501a69cSRobert Watson INP_WLOCK(inp); 758a152f8a3SRobert Watson rip_dodisconnect(so, inp); 7598501a69cSRobert Watson INP_WUNLOCK(inp); 760603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 761a152f8a3SRobert Watson } 762a152f8a3SRobert Watson 763a152f8a3SRobert Watson static void 764a152f8a3SRobert Watson rip_close(struct socket *so) 765a152f8a3SRobert Watson { 7668b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 767a152f8a3SRobert Watson struct inpcb *inp; 768a152f8a3SRobert Watson 769a152f8a3SRobert Watson inp = sotoinpcb(so); 770a152f8a3SRobert Watson KASSERT(inp != NULL, ("rip_close: inp == NULL")); 771a152f8a3SRobert Watson 772603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 7738501a69cSRobert Watson INP_WLOCK(inp); 774a152f8a3SRobert Watson rip_dodisconnect(so, inp); 7758501a69cSRobert Watson INP_WUNLOCK(inp); 776603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 777117bcae7SGarrett Wollman } 778117bcae7SGarrett Wollman 779117bcae7SGarrett Wollman static int 780117bcae7SGarrett Wollman rip_disconnect(struct socket *so) 781117bcae7SGarrett Wollman { 7828b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 783eb16472fSMaxim Konovalov struct inpcb *inp; 784eb16472fSMaxim Konovalov 7854cc20ab1SSeigo Tanimura if ((so->so_state & SS_ISCONNECTED) == 0) 7860ae76120SRobert Watson return (ENOTCONN); 787eb16472fSMaxim Konovalov 788eb16472fSMaxim Konovalov inp = sotoinpcb(so); 789eb16472fSMaxim Konovalov KASSERT(inp != NULL, ("rip_disconnect: inp == NULL")); 7900ae76120SRobert Watson 791603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 7928501a69cSRobert Watson INP_WLOCK(inp); 793a152f8a3SRobert Watson rip_dodisconnect(so, inp); 7948501a69cSRobert Watson INP_WUNLOCK(inp); 795603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 79614ba8addSRobert Watson return (0); 797117bcae7SGarrett Wollman } 798117bcae7SGarrett Wollman 799117bcae7SGarrett Wollman static int 800b40ce416SJulian Elischer rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 801117bcae7SGarrett Wollman { 8028b615593SMarko Zec INIT_VNET_NET(so->so_vnet); 8038b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 80457bf258eSGarrett Wollman struct sockaddr_in *addr = (struct sockaddr_in *)nam; 80550d7c061SSam Leffler struct inpcb *inp; 806df8bae1dSRodney W. Grimes 80757bf258eSGarrett Wollman if (nam->sa_len != sizeof(*addr)) 8080ae76120SRobert Watson return (EINVAL); 809117bcae7SGarrett Wollman 810413628a7SBjoern A. Zeeb if (!prison_check_ip4(td->td_ucred, &addr->sin_addr)) 8115a59cefcSBosko Milekic return (EADDRNOTAVAIL); 8125a59cefcSBosko Milekic 813603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_ifnet) || 81450d7c061SSam Leffler (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || 815032dcc76SLuigi Rizzo (addr->sin_addr.s_addr && 816117bcae7SGarrett Wollman ifa_ifwithaddr((struct sockaddr *)addr) == 0)) 8170ae76120SRobert Watson return (EADDRNOTAVAIL); 81850d7c061SSam Leffler 81950d7c061SSam Leffler inp = sotoinpcb(so); 82014ba8addSRobert Watson KASSERT(inp != NULL, ("rip_bind: inp == NULL")); 8210ae76120SRobert Watson 822603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 8238501a69cSRobert Watson INP_WLOCK(inp); 8249ed324c9SAlexander Motin rip_delhash(inp); 825df8bae1dSRodney W. Grimes inp->inp_laddr = addr->sin_addr; 8269ed324c9SAlexander Motin rip_inshash(inp); 8278501a69cSRobert Watson INP_WUNLOCK(inp); 828603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 8290ae76120SRobert Watson return (0); 830df8bae1dSRodney W. Grimes } 831117bcae7SGarrett Wollman 832117bcae7SGarrett Wollman static int 833b40ce416SJulian Elischer rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 834df8bae1dSRodney W. Grimes { 8358b615593SMarko Zec INIT_VNET_NET(so->so_vnet); 8368b615593SMarko Zec INIT_VNET_INET(so->so_vnet); 83757bf258eSGarrett Wollman struct sockaddr_in *addr = (struct sockaddr_in *)nam; 83850d7c061SSam Leffler struct inpcb *inp; 839df8bae1dSRodney W. Grimes 84057bf258eSGarrett Wollman if (nam->sa_len != sizeof(*addr)) 8410ae76120SRobert Watson return (EINVAL); 842603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_ifnet)) 8430ae76120SRobert Watson return (EADDRNOTAVAIL); 84450d7c061SSam Leffler if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) 8450ae76120SRobert Watson return (EAFNOSUPPORT); 84650d7c061SSam Leffler 84750d7c061SSam Leffler inp = sotoinpcb(so); 84814ba8addSRobert Watson KASSERT(inp != NULL, ("rip_connect: inp == NULL")); 8490ae76120SRobert Watson 850603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_ripcbinfo); 8518501a69cSRobert Watson INP_WLOCK(inp); 8529ed324c9SAlexander Motin rip_delhash(inp); 853df8bae1dSRodney W. Grimes inp->inp_faddr = addr->sin_addr; 8549ed324c9SAlexander Motin rip_inshash(inp); 855df8bae1dSRodney W. Grimes soisconnected(so); 8568501a69cSRobert Watson INP_WUNLOCK(inp); 857603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_ripcbinfo); 8580ae76120SRobert Watson return (0); 859df8bae1dSRodney W. Grimes } 860df8bae1dSRodney W. Grimes 861117bcae7SGarrett Wollman static int 862117bcae7SGarrett Wollman rip_shutdown(struct socket *so) 863df8bae1dSRodney W. Grimes { 86450d7c061SSam Leffler struct inpcb *inp; 86550d7c061SSam Leffler 86650d7c061SSam Leffler inp = sotoinpcb(so); 86714ba8addSRobert Watson KASSERT(inp != NULL, ("rip_shutdown: inp == NULL")); 8680ae76120SRobert Watson 8698501a69cSRobert Watson INP_WLOCK(inp); 870117bcae7SGarrett Wollman socantsendmore(so); 8718501a69cSRobert Watson INP_WUNLOCK(inp); 8720ae76120SRobert Watson return (0); 873117bcae7SGarrett Wollman } 874117bcae7SGarrett Wollman 875117bcae7SGarrett Wollman static int 87657bf258eSGarrett Wollman rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 877b40ce416SJulian Elischer struct mbuf *control, struct thread *td) 878117bcae7SGarrett Wollman { 87950d7c061SSam Leffler struct inpcb *inp; 88050d7c061SSam Leffler u_long dst; 881df8bae1dSRodney W. Grimes 88250d7c061SSam Leffler inp = sotoinpcb(so); 88314ba8addSRobert Watson KASSERT(inp != NULL, ("rip_send: inp == NULL")); 8840ae76120SRobert Watson 88514ba8addSRobert Watson /* 88614ba8addSRobert Watson * Note: 'dst' reads below are unlocked. 88714ba8addSRobert Watson */ 888df8bae1dSRodney W. Grimes if (so->so_state & SS_ISCONNECTED) { 889df8bae1dSRodney W. Grimes if (nam) { 890117bcae7SGarrett Wollman m_freem(m); 8910ae76120SRobert Watson return (EISCONN); 892df8bae1dSRodney W. Grimes } 89314ba8addSRobert Watson dst = inp->inp_faddr.s_addr; /* Unlocked read. */ 894df8bae1dSRodney W. Grimes } else { 895df8bae1dSRodney W. Grimes if (nam == NULL) { 896117bcae7SGarrett Wollman m_freem(m); 8970ae76120SRobert Watson return (ENOTCONN); 898df8bae1dSRodney W. Grimes } 89957bf258eSGarrett Wollman dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; 900df8bae1dSRodney W. Grimes } 9010ae76120SRobert Watson return (rip_output(m, so, dst)); 902df8bae1dSRodney W. Grimes } 903df8bae1dSRodney W. Grimes 90498271db4SGarrett Wollman static int 90582d9ae4eSPoul-Henning Kamp rip_pcblist(SYSCTL_HANDLER_ARGS) 90698271db4SGarrett Wollman { 9078b615593SMarko Zec INIT_VNET_INET(curvnet); 9083b6dd5a9SSam Leffler int error, i, n; 90998271db4SGarrett Wollman struct inpcb *inp, **inp_list; 91098271db4SGarrett Wollman inp_gen_t gencnt; 91198271db4SGarrett Wollman struct xinpgen xig; 91298271db4SGarrett Wollman 91398271db4SGarrett Wollman /* 91498271db4SGarrett Wollman * The process of preparing the TCB list is too time-consuming and 91598271db4SGarrett Wollman * resource-intensive to repeat twice on every request. 91698271db4SGarrett Wollman */ 91798271db4SGarrett Wollman if (req->oldptr == 0) { 918603724d3SBjoern A. Zeeb n = V_ripcbinfo.ipi_count; 91998271db4SGarrett Wollman req->oldidx = 2 * (sizeof xig) 92098271db4SGarrett Wollman + (n + n/8) * sizeof(struct xinpcb); 9210ae76120SRobert Watson return (0); 92298271db4SGarrett Wollman } 92398271db4SGarrett Wollman 92498271db4SGarrett Wollman if (req->newptr != 0) 9250ae76120SRobert Watson return (EPERM); 92698271db4SGarrett Wollman 92798271db4SGarrett Wollman /* 92898271db4SGarrett Wollman * OK, now we're committed to doing something. 92998271db4SGarrett Wollman */ 930603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_ripcbinfo); 931603724d3SBjoern A. Zeeb gencnt = V_ripcbinfo.ipi_gencnt; 932603724d3SBjoern A. Zeeb n = V_ripcbinfo.ipi_count; 933603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_ripcbinfo); 93498271db4SGarrett Wollman 93598271db4SGarrett Wollman xig.xig_len = sizeof xig; 93698271db4SGarrett Wollman xig.xig_count = n; 93798271db4SGarrett Wollman xig.xig_gen = gencnt; 93898271db4SGarrett Wollman xig.xig_sogen = so_gencnt; 93998271db4SGarrett Wollman error = SYSCTL_OUT(req, &xig, sizeof xig); 94098271db4SGarrett Wollman if (error) 9410ae76120SRobert Watson return (error); 94298271db4SGarrett Wollman 943a163d034SWarner Losh inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 94498271db4SGarrett Wollman if (inp_list == 0) 9450ae76120SRobert Watson return (ENOMEM); 94698271db4SGarrett Wollman 947603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_ripcbinfo); 948603724d3SBjoern A. Zeeb for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n; 949fc2ffbe6SPoul-Henning Kamp inp = LIST_NEXT(inp, inp_list)) { 9509ad11dd8SRobert Watson INP_RLOCK(inp); 951f34f3a70SSam Leffler if (inp->inp_gencnt <= gencnt && 952f08ef6c5SBjoern A. Zeeb cr_canseeinpcb(req->td->td_ucred, inp) == 0) { 9533b6dd5a9SSam Leffler /* XXX held references? */ 95498271db4SGarrett Wollman inp_list[i++] = inp; 95598271db4SGarrett Wollman } 9569ad11dd8SRobert Watson INP_RUNLOCK(inp); 9574787fd37SPaul Saab } 958603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_ripcbinfo); 95998271db4SGarrett Wollman n = i; 96098271db4SGarrett Wollman 96198271db4SGarrett Wollman error = 0; 96298271db4SGarrett Wollman for (i = 0; i < n; i++) { 96398271db4SGarrett Wollman inp = inp_list[i]; 9649ad11dd8SRobert Watson INP_RLOCK(inp); 96598271db4SGarrett Wollman if (inp->inp_gencnt <= gencnt) { 96698271db4SGarrett Wollman struct xinpcb xi; 9673bb87a6cSKip Macy 968fd94099eSColin Percival bzero(&xi, sizeof(xi)); 96998271db4SGarrett Wollman xi.xi_len = sizeof xi; 97098271db4SGarrett Wollman /* XXX should avoid extra copy */ 97198271db4SGarrett Wollman bcopy(inp, &xi.xi_inp, sizeof *inp); 97298271db4SGarrett Wollman if (inp->inp_socket) 97398271db4SGarrett Wollman sotoxsocket(inp->inp_socket, &xi.xi_socket); 9749ad11dd8SRobert Watson INP_RUNLOCK(inp); 97598271db4SGarrett Wollman error = SYSCTL_OUT(req, &xi, sizeof xi); 976d915b280SStephan Uphoff } else 9779ad11dd8SRobert Watson INP_RUNLOCK(inp); 97898271db4SGarrett Wollman } 97998271db4SGarrett Wollman if (!error) { 98098271db4SGarrett Wollman /* 9810ae76120SRobert Watson * Give the user an updated idea of our state. If the 9820ae76120SRobert Watson * generation differs from what we told her before, she knows 9830ae76120SRobert Watson * that something happened while we were processing this 9840ae76120SRobert Watson * request, and it might be necessary to retry. 98598271db4SGarrett Wollman */ 986603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_ripcbinfo); 987603724d3SBjoern A. Zeeb xig.xig_gen = V_ripcbinfo.ipi_gencnt; 98898271db4SGarrett Wollman xig.xig_sogen = so_gencnt; 989603724d3SBjoern A. Zeeb xig.xig_count = V_ripcbinfo.ipi_count; 990603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_ripcbinfo); 99198271db4SGarrett Wollman error = SYSCTL_OUT(req, &xig, sizeof xig); 99298271db4SGarrett Wollman } 99398271db4SGarrett Wollman free(inp_list, M_TEMP); 9940ae76120SRobert Watson return (error); 99598271db4SGarrett Wollman } 99698271db4SGarrett Wollman 99798271db4SGarrett Wollman SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, 99898271db4SGarrett Wollman rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); 99998271db4SGarrett Wollman 1000117bcae7SGarrett Wollman struct pr_usrreqs rip_usrreqs = { 1001756d52a1SPoul-Henning Kamp .pru_abort = rip_abort, 1002756d52a1SPoul-Henning Kamp .pru_attach = rip_attach, 1003756d52a1SPoul-Henning Kamp .pru_bind = rip_bind, 1004756d52a1SPoul-Henning Kamp .pru_connect = rip_connect, 1005756d52a1SPoul-Henning Kamp .pru_control = in_control, 1006756d52a1SPoul-Henning Kamp .pru_detach = rip_detach, 1007756d52a1SPoul-Henning Kamp .pru_disconnect = rip_disconnect, 100854d642bbSRobert Watson .pru_peeraddr = in_getpeeraddr, 1009756d52a1SPoul-Henning Kamp .pru_send = rip_send, 1010756d52a1SPoul-Henning Kamp .pru_shutdown = rip_shutdown, 101154d642bbSRobert Watson .pru_sockaddr = in_getsockaddr, 1012a152f8a3SRobert Watson .pru_sosetlabel = in_pcbsosetlabel, 1013a152f8a3SRobert Watson .pru_close = rip_close, 1014117bcae7SGarrett Wollman }; 1015