1c398230bSWarner Losh /*- 2ea80aed1SGarrett Wollman * Copyright 1994, 1995 Massachusetts Institute of Technology 3d4a8d8f2SGarrett Wollman * 4ea80aed1SGarrett Wollman * Permission to use, copy, modify, and distribute this software and 5ea80aed1SGarrett Wollman * its documentation for any purpose and without fee is hereby 6ea80aed1SGarrett Wollman * granted, provided that both the above copyright notice and this 7ea80aed1SGarrett Wollman * permission notice appear in all copies, that both the above 8ea80aed1SGarrett Wollman * copyright notice and this permission notice appear in all 9ea80aed1SGarrett Wollman * supporting documentation, and that the name of M.I.T. not be used 10ea80aed1SGarrett Wollman * in advertising or publicity pertaining to distribution of the 11ea80aed1SGarrett Wollman * software without specific, written prior permission. M.I.T. makes 12ea80aed1SGarrett Wollman * no representations about the suitability of this software for any 13ea80aed1SGarrett Wollman * purpose. It is provided "as is" without express or implied 14ea80aed1SGarrett Wollman * warranty. 15d4a8d8f2SGarrett Wollman * 16ea80aed1SGarrett Wollman * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 17ea80aed1SGarrett Wollman * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 18ea80aed1SGarrett Wollman * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19ea80aed1SGarrett Wollman * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 20ea80aed1SGarrett Wollman * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21ea80aed1SGarrett Wollman * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22ea80aed1SGarrett Wollman * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23ea80aed1SGarrett Wollman * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24ea80aed1SGarrett Wollman * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25ea80aed1SGarrett Wollman * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26ea80aed1SGarrett Wollman * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27ea80aed1SGarrett Wollman * SUCH DAMAGE. 28d4a8d8f2SGarrett Wollman */ 29d4a8d8f2SGarrett Wollman 304b421e2dSMike Silbersack #include <sys/cdefs.h> 314b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 324b421e2dSMike Silbersack 334684d3cbSAlexander V. Chernikov #include "opt_mpath.h" 344684d3cbSAlexander V. Chernikov 35d4a8d8f2SGarrett Wollman #include <sys/param.h> 36d4a8d8f2SGarrett Wollman #include <sys/systm.h> 37d4a8d8f2SGarrett Wollman #include <sys/kernel.h> 3898163b98SPoul-Henning Kamp #include <sys/sysctl.h> 39d4a8d8f2SGarrett Wollman #include <sys/socket.h> 40d4a8d8f2SGarrett Wollman #include <sys/mbuf.h> 41d4a8d8f2SGarrett Wollman 42d4a8d8f2SGarrett Wollman #include <net/if.h> 4376039bc8SGleb Smirnoff #include <net/if_var.h> 44d4a8d8f2SGarrett Wollman #include <net/route.h> 4561eee0e2SAlexander V. Chernikov #include <net/route_var.h> 46*a6663252SAlexander V. Chernikov #include <net/route/nhop.h> 47*a6663252SAlexander V. Chernikov #include <net/route/shared.h> 48530c0060SRobert Watson #include <net/vnet.h> 494b79449eSBjoern A. Zeeb 50d4a8d8f2SGarrett Wollman #include <netinet/in.h> 51d4a8d8f2SGarrett Wollman #include <netinet/in_var.h> 523c2824b9SAlexander V. Chernikov #include <netinet/ip.h> 533c2824b9SAlexander V. Chernikov #include <netinet/ip_icmp.h> 541e3d5af0SRuslan Ermilov #include <netinet/ip_var.h> 55d4a8d8f2SGarrett Wollman 56ead85fe4SAlexander V. Chernikov extern int in_inithead(void **head, int off, u_int fibnum); 57bc29160dSMarko Zec #ifdef VIMAGE 58bc29160dSMarko Zec extern int in_detachhead(void **head, int off); 59bc29160dSMarko Zec #endif 60ce7609a4SBruce Evans 61*a6663252SAlexander V. Chernikov static int 62*a6663252SAlexander V. Chernikov rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask, 63*a6663252SAlexander V. Chernikov struct nhop_object *nh) 64*a6663252SAlexander V. Chernikov { 65*a6663252SAlexander V. Chernikov const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr; 66*a6663252SAlexander V. Chernikov uint16_t nh_type; 67*a6663252SAlexander V. Chernikov int rt_flags; 68*a6663252SAlexander V. Chernikov 69*a6663252SAlexander V. Chernikov /* XXX: RTF_LOCAL && RTF_MULTICAST */ 70*a6663252SAlexander V. Chernikov 71*a6663252SAlexander V. Chernikov rt_flags = nhop_get_rtflags(nh); 72*a6663252SAlexander V. Chernikov 73*a6663252SAlexander V. Chernikov if (rt_flags & RTF_HOST) { 74*a6663252SAlexander V. Chernikov 75*a6663252SAlexander V. Chernikov /* 76*a6663252SAlexander V. Chernikov * Backward compatibility: 77*a6663252SAlexander V. Chernikov * if the destination is broadcast, 78*a6663252SAlexander V. Chernikov * mark route as broadcast. 79*a6663252SAlexander V. Chernikov * This behavior was useful when route cloning 80*a6663252SAlexander V. Chernikov * was in place, so there was an explicit cloned 81*a6663252SAlexander V. Chernikov * route for every broadcasted address. 82*a6663252SAlexander V. Chernikov * Currently (2020-04) there is no kernel machinery 83*a6663252SAlexander V. Chernikov * to do route cloning, though someone might explicitly 84*a6663252SAlexander V. Chernikov * add these routes to support some cases with active-active 85*a6663252SAlexander V. Chernikov * load balancing. Given that, retain this support. 86*a6663252SAlexander V. Chernikov */ 87*a6663252SAlexander V. Chernikov if (in_broadcast(addr4->sin_addr, nh->nh_ifp)) { 88*a6663252SAlexander V. Chernikov rt_flags |= RTF_BROADCAST; 89*a6663252SAlexander V. Chernikov nhop_set_rtflags(nh, rt_flags); 90*a6663252SAlexander V. Chernikov nh->nh_flags |= NHF_BROADCAST; 91*a6663252SAlexander V. Chernikov } 92*a6663252SAlexander V. Chernikov } 93*a6663252SAlexander V. Chernikov 94*a6663252SAlexander V. Chernikov /* 95*a6663252SAlexander V. Chernikov * Check route MTU: 96*a6663252SAlexander V. Chernikov * inherit interface MTU if not set or 97*a6663252SAlexander V. Chernikov * check if MTU is too large. 98*a6663252SAlexander V. Chernikov */ 99*a6663252SAlexander V. Chernikov if (nh->nh_mtu == 0) { 100*a6663252SAlexander V. Chernikov nh->nh_mtu = nh->nh_ifp->if_mtu; 101*a6663252SAlexander V. Chernikov } else if (nh->nh_mtu > nh->nh_ifp->if_mtu) 102*a6663252SAlexander V. Chernikov nh->nh_mtu = nh->nh_ifp->if_mtu; 103*a6663252SAlexander V. Chernikov 104*a6663252SAlexander V. Chernikov /* Ensure that default route nhop has special flag */ 105*a6663252SAlexander V. Chernikov const struct sockaddr_in *mask4 = (const struct sockaddr_in *)mask; 106*a6663252SAlexander V. Chernikov if ((rt_flags & RTF_HOST) == 0 && mask4->sin_addr.s_addr == 0) 107*a6663252SAlexander V. Chernikov nh->nh_flags |= NHF_DEFAULT; 108*a6663252SAlexander V. Chernikov 109*a6663252SAlexander V. Chernikov /* Set nhop type to basic per-AF nhop */ 110*a6663252SAlexander V. Chernikov if (nhop_get_type(nh) == 0) { 111*a6663252SAlexander V. Chernikov if (nh->nh_flags & NHF_GATEWAY) 112*a6663252SAlexander V. Chernikov nh_type = NH_TYPE_IPV4_ETHER_NHOP; 113*a6663252SAlexander V. Chernikov else 114*a6663252SAlexander V. Chernikov nh_type = NH_TYPE_IPV4_ETHER_RSLV; 115*a6663252SAlexander V. Chernikov 116*a6663252SAlexander V. Chernikov nhop_set_type(nh, nh_type); 117*a6663252SAlexander V. Chernikov } 118*a6663252SAlexander V. Chernikov 119*a6663252SAlexander V. Chernikov return (0); 120*a6663252SAlexander V. Chernikov } 121*a6663252SAlexander V. Chernikov 122d4a8d8f2SGarrett Wollman /* 123d4a8d8f2SGarrett Wollman * Do what we need to do when inserting a route. 124d4a8d8f2SGarrett Wollman */ 125d4a8d8f2SGarrett Wollman static struct radix_node * 12661eee0e2SAlexander V. Chernikov in_addroute(void *v_arg, void *n_arg, struct radix_head *head, 127d4a8d8f2SGarrett Wollman struct radix_node *treenodes) 128d4a8d8f2SGarrett Wollman { 129d4a8d8f2SGarrett Wollman struct rtentry *rt = (struct rtentry *)treenodes; 130dd224982SGarrett Wollman struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); 131d4a8d8f2SGarrett Wollman 132d4a8d8f2SGarrett Wollman /* 1339f9b3dc4SGarrett Wollman * A little bit of help for both IP output and input: 1349f9b3dc4SGarrett Wollman * For host routes, we make sure that RTF_BROADCAST 1359f9b3dc4SGarrett Wollman * is set for anything that looks like a broadcast address. 1369f9b3dc4SGarrett Wollman * This way, we can avoid an expensive call to in_broadcast() 1379f9b3dc4SGarrett Wollman * in ip_output() most of the time (because the route passed 1389f9b3dc4SGarrett Wollman * to ip_output() is almost always a host route). 1399f9b3dc4SGarrett Wollman * 1409f9b3dc4SGarrett Wollman * We also do the same for local addresses, with the thought 1419f9b3dc4SGarrett Wollman * that this might one day be used to speed up ip_input(). 1429f9b3dc4SGarrett Wollman * 1439f9b3dc4SGarrett Wollman * We also mark routes to multicast addresses as such, because 1449f9b3dc4SGarrett Wollman * it's easy to do and might be useful (but this is much more 14526d02ca7SAndre Oppermann * dubious since it's so easy to inspect the address). 1469f9b3dc4SGarrett Wollman */ 1479f9b3dc4SGarrett Wollman if (rt->rt_flags & RTF_HOST) { 148b8a6e03fSGleb Smirnoff struct epoch_tracker et; 149b8a6e03fSGleb Smirnoff bool bcast; 150b8a6e03fSGleb Smirnoff 151b8a6e03fSGleb Smirnoff NET_EPOCH_ENTER(et); 152b8a6e03fSGleb Smirnoff bcast = in_broadcast(sin->sin_addr, rt->rt_ifp); 153b8a6e03fSGleb Smirnoff NET_EPOCH_EXIT(et); 154b8a6e03fSGleb Smirnoff if (bcast) 1559f9b3dc4SGarrett Wollman rt->rt_flags |= RTF_BROADCAST; 156b8a6e03fSGleb Smirnoff else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr == 157b8a6e03fSGleb Smirnoff sin->sin_addr.s_addr) 1589f9b3dc4SGarrett Wollman rt->rt_flags |= RTF_LOCAL; 1599f9b3dc4SGarrett Wollman } 16026d02ca7SAndre Oppermann if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 16126d02ca7SAndre Oppermann rt->rt_flags |= RTF_MULTICAST; 162d4a8d8f2SGarrett Wollman 1631a75e3b2SAlexander V. Chernikov if (rt->rt_ifp != NULL) { 1641a75e3b2SAlexander V. Chernikov 1651a75e3b2SAlexander V. Chernikov /* 1661a75e3b2SAlexander V. Chernikov * Check route MTU: 1671a75e3b2SAlexander V. Chernikov * inherit interface MTU if not set or 1681a75e3b2SAlexander V. Chernikov * check if MTU is too large. 1691a75e3b2SAlexander V. Chernikov */ 1701a75e3b2SAlexander V. Chernikov if (rt->rt_mtu == 0) { 171e3a7aa6fSGleb Smirnoff rt->rt_mtu = rt->rt_ifp->if_mtu; 1721a75e3b2SAlexander V. Chernikov } else if (rt->rt_mtu > rt->rt_ifp->if_mtu) 1731a75e3b2SAlexander V. Chernikov rt->rt_mtu = rt->rt_ifp->if_mtu; 1741a75e3b2SAlexander V. Chernikov } 175dd224982SGarrett Wollman 1766e6b3f7cSQing Li return (rn_addroute(v_arg, n_arg, head, treenodes)); 177d4a8d8f2SGarrett Wollman } 178d4a8d8f2SGarrett Wollman 1798b07e49aSJulian Elischer static int _in_rt_was_here; 180d4a8d8f2SGarrett Wollman /* 181d4a8d8f2SGarrett Wollman * Initialize our routing tree. 182d4a8d8f2SGarrett Wollman */ 183d4a8d8f2SGarrett Wollman int 184ead85fe4SAlexander V. Chernikov in_inithead(void **head, int off, u_int fibnum) 185d4a8d8f2SGarrett Wollman { 18661eee0e2SAlexander V. Chernikov struct rib_head *rh; 187d4a8d8f2SGarrett Wollman 188ead85fe4SAlexander V. Chernikov rh = rt_table_init(32, AF_INET, fibnum); 18961eee0e2SAlexander V. Chernikov if (rh == NULL) 19061eee0e2SAlexander V. Chernikov return (0); 191d4a8d8f2SGarrett Wollman 192*a6663252SAlexander V. Chernikov rh->rnh_preadd = rib4_preadd; 19361eee0e2SAlexander V. Chernikov rh->rnh_addaddr = in_addroute; 1944684d3cbSAlexander V. Chernikov #ifdef RADIX_MPATH 1954684d3cbSAlexander V. Chernikov rt_mpath_init_rnh(rh); 1964684d3cbSAlexander V. Chernikov #endif 19761eee0e2SAlexander V. Chernikov *head = (void *)rh; 19831f0d081SAlexander V. Chernikov 1998b07e49aSJulian Elischer if (_in_rt_was_here == 0 ) { 2008b07e49aSJulian Elischer _in_rt_was_here = 1; 2018b07e49aSJulian Elischer } 202d4a8d8f2SGarrett Wollman return 1; 203d4a8d8f2SGarrett Wollman } 204d4a8d8f2SGarrett Wollman 205bc29160dSMarko Zec #ifdef VIMAGE 206bc29160dSMarko Zec int 207bc29160dSMarko Zec in_detachhead(void **head, int off) 208bc29160dSMarko Zec { 209bc29160dSMarko Zec 210a5243af2SBjoern A. Zeeb rt_table_destroy((struct rib_head *)(*head)); 211a5243af2SBjoern A. Zeeb return (1); 212bc29160dSMarko Zec } 213bc29160dSMarko Zec #endif 214bc29160dSMarko Zec 21539191c8eSGarrett Wollman /* 21691854268SRuslan Ermilov * This zaps old routes when the interface goes down or interface 21791854268SRuslan Ermilov * address is deleted. In the latter case, it deletes static routes 21891854268SRuslan Ermilov * that point to this address. If we don't do this, we may end up 21991854268SRuslan Ermilov * using the old address in the future. The ones we always want to 22091854268SRuslan Ermilov * get rid of are things like ARP entries, since the user might down 22191854268SRuslan Ermilov * the interface, walk over to a completely different network, and 22291854268SRuslan Ermilov * plug back in. 22339191c8eSGarrett Wollman */ 22439191c8eSGarrett Wollman struct in_ifadown_arg { 22539191c8eSGarrett Wollman struct ifaddr *ifa; 22691854268SRuslan Ermilov int del; 22739191c8eSGarrett Wollman }; 22839191c8eSGarrett Wollman 22939191c8eSGarrett Wollman static int 230e8b0643eSAlexander V. Chernikov in_ifadownkill(const struct rtentry *rt, void *xap) 23139191c8eSGarrett Wollman { 23239191c8eSGarrett Wollman struct in_ifadown_arg *ap = xap; 23339191c8eSGarrett Wollman 234e8b0643eSAlexander V. Chernikov if (rt->rt_ifa != ap->ifa) 2354579930dSBjoern A. Zeeb return (0); 23639191c8eSGarrett Wollman 237e8b0643eSAlexander V. Chernikov if ((rt->rt_flags & RTF_STATIC) != 0 && ap->del == 0) 238e8b0643eSAlexander V. Chernikov return (0); 2394bdf0b6aSAlexander V. Chernikov 240e8b0643eSAlexander V. Chernikov return (1); 2414bdf0b6aSAlexander V. Chernikov } 2424bdf0b6aSAlexander V. Chernikov 243586904c2SGleb Smirnoff void 24491854268SRuslan Ermilov in_ifadown(struct ifaddr *ifa, int delete) 24539191c8eSGarrett Wollman { 24639191c8eSGarrett Wollman struct in_ifadown_arg arg; 24739191c8eSGarrett Wollman 248586904c2SGleb Smirnoff KASSERT(ifa->ifa_addr->sa_family == AF_INET, 249586904c2SGleb Smirnoff ("%s: wrong family", __func__)); 25039191c8eSGarrett Wollman 25139191c8eSGarrett Wollman arg.ifa = ifa; 25291854268SRuslan Ermilov arg.del = delete; 2534bdf0b6aSAlexander V. Chernikov 254e8b0643eSAlexander V. Chernikov rt_foreach_fib_walk_del(AF_INET, in_ifadownkill, &arg); 255d1dd20beSSam Leffler ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */ 2568b07e49aSJulian Elischer } 2578b07e49aSJulian Elischer 2588b07e49aSJulian Elischer /* 2598b07e49aSJulian Elischer * inet versions of rt functions. These have fib extensions and 2608b07e49aSJulian Elischer * for now will just reference the _fib variants. 2618b07e49aSJulian Elischer * eventually this order will be reversed, 2628b07e49aSJulian Elischer */ 2638b07e49aSJulian Elischer void 2648b07e49aSJulian Elischer in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum) 2658b07e49aSJulian Elischer { 2668b07e49aSJulian Elischer rtalloc_ign_fib(ro, ignflags, fibnum); 2678b07e49aSJulian Elischer } 2688b07e49aSJulian Elischer 269