11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * ROUTE - implementation of the IP router. 71da177e4SLinus Torvalds * 802c30a84SJesper Juhl * Authors: Ross Biro 91da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 101da177e4SLinus Torvalds * Alan Cox, <gw4pts@gw4pts.ampr.org> 111da177e4SLinus Torvalds * Linus Torvalds, <Linus.Torvalds@helsinki.fi> 121da177e4SLinus Torvalds * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * Fixes: 151da177e4SLinus Torvalds * Alan Cox : Verify area fixes. 161da177e4SLinus Torvalds * Alan Cox : cli() protects routing changes 171da177e4SLinus Torvalds * Rui Oliveira : ICMP routing table updates 181da177e4SLinus Torvalds * (rco@di.uminho.pt) Routing table insertion and update 191da177e4SLinus Torvalds * Linus Torvalds : Rewrote bits to be sensible 201da177e4SLinus Torvalds * Alan Cox : Added BSD route gw semantics 211da177e4SLinus Torvalds * Alan Cox : Super /proc >4K 221da177e4SLinus Torvalds * Alan Cox : MTU in route table 231da177e4SLinus Torvalds * Alan Cox : MSS actually. Also added the window 241da177e4SLinus Torvalds * clamper. 251da177e4SLinus Torvalds * Sam Lantinga : Fixed route matching in rt_del() 261da177e4SLinus Torvalds * Alan Cox : Routing cache support. 271da177e4SLinus Torvalds * Alan Cox : Removed compatibility cruft. 281da177e4SLinus Torvalds * Alan Cox : RTF_REJECT support. 291da177e4SLinus Torvalds * Alan Cox : TCP irtt support. 301da177e4SLinus Torvalds * Jonathan Naylor : Added Metric support. 311da177e4SLinus Torvalds * Miquel van Smoorenburg : BSD API fixes. 321da177e4SLinus Torvalds * Miquel van Smoorenburg : Metrics. 331da177e4SLinus Torvalds * Alan Cox : Use __u32 properly 341da177e4SLinus Torvalds * Alan Cox : Aligned routing errors more closely with BSD 351da177e4SLinus Torvalds * our system is still very different. 361da177e4SLinus Torvalds * Alan Cox : Faster /proc handling 371da177e4SLinus Torvalds * Alexey Kuznetsov : Massive rework to support tree based routing, 381da177e4SLinus Torvalds * routing caches and better behaviour. 391da177e4SLinus Torvalds * 401da177e4SLinus Torvalds * Olaf Erb : irtt wasn't being copied right. 411da177e4SLinus Torvalds * Bjorn Ekwall : Kerneld route support. 421da177e4SLinus Torvalds * Alan Cox : Multicast fixed (I hope) 431da177e4SLinus Torvalds * Pavel Krauz : Limited broadcast fixed 441da177e4SLinus Torvalds * Mike McLagan : Routing by source 451da177e4SLinus Torvalds * Alexey Kuznetsov : End of old history. Split to fib.c and 461da177e4SLinus Torvalds * route.c and rewritten from scratch. 471da177e4SLinus Torvalds * Andi Kleen : Load-limit warning messages. 481da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year coma. 491da177e4SLinus Torvalds * Vitaly E. Lavrov : Race condition in ip_route_input_slow. 501da177e4SLinus Torvalds * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. 511da177e4SLinus Torvalds * Vladimir V. Ivanov : IP rule info (flowid) is really useful. 521da177e4SLinus Torvalds * Marc Boucher : routing by fwmark 531da177e4SLinus Torvalds * Robert Olsson : Added rt_cache statistics 541da177e4SLinus Torvalds * Arnaldo C. Melo : Convert proc stuff to seq_file 55bb1d23b0SEric Dumazet * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. 56cef2685eSIlia Sotnikov * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect 57cef2685eSIlia Sotnikov * Ilia Sotnikov : Removed TOS from hash calculations 581da177e4SLinus Torvalds * 591da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 601da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 611da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 621da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 631da177e4SLinus Torvalds */ 641da177e4SLinus Torvalds 65afd46503SJoe Perches #define pr_fmt(fmt) "IPv4: " fmt 66afd46503SJoe Perches 671da177e4SLinus Torvalds #include <linux/module.h> 687c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 691da177e4SLinus Torvalds #include <linux/bitops.h> 701da177e4SLinus Torvalds #include <linux/types.h> 711da177e4SLinus Torvalds #include <linux/kernel.h> 721da177e4SLinus Torvalds #include <linux/mm.h> 731da177e4SLinus Torvalds #include <linux/string.h> 741da177e4SLinus Torvalds #include <linux/socket.h> 751da177e4SLinus Torvalds #include <linux/sockios.h> 761da177e4SLinus Torvalds #include <linux/errno.h> 771da177e4SLinus Torvalds #include <linux/in.h> 781da177e4SLinus Torvalds #include <linux/inet.h> 791da177e4SLinus Torvalds #include <linux/netdevice.h> 801da177e4SLinus Torvalds #include <linux/proc_fs.h> 811da177e4SLinus Torvalds #include <linux/init.h> 821da177e4SLinus Torvalds #include <linux/skbuff.h> 831da177e4SLinus Torvalds #include <linux/inetdevice.h> 841da177e4SLinus Torvalds #include <linux/igmp.h> 851da177e4SLinus Torvalds #include <linux/pkt_sched.h> 861da177e4SLinus Torvalds #include <linux/mroute.h> 871da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h> 881da177e4SLinus Torvalds #include <linux/random.h> 891da177e4SLinus Torvalds #include <linux/rcupdate.h> 901da177e4SLinus Torvalds #include <linux/times.h> 915a0e3ad6STejun Heo #include <linux/slab.h> 9273f156a6SEric Dumazet #include <linux/jhash.h> 93352e512cSHerbert Xu #include <net/dst.h> 941b7179d3SThomas Graf #include <net/dst_metadata.h> 95457c4cbcSEric W. Biederman #include <net/net_namespace.h> 961da177e4SLinus Torvalds #include <net/protocol.h> 971da177e4SLinus Torvalds #include <net/ip.h> 981da177e4SLinus Torvalds #include <net/route.h> 991da177e4SLinus Torvalds #include <net/inetpeer.h> 1001da177e4SLinus Torvalds #include <net/sock.h> 1011da177e4SLinus Torvalds #include <net/ip_fib.h> 1021da177e4SLinus Torvalds #include <net/arp.h> 1031da177e4SLinus Torvalds #include <net/tcp.h> 1041da177e4SLinus Torvalds #include <net/icmp.h> 1051da177e4SLinus Torvalds #include <net/xfrm.h> 106571e7226SRoopa Prabhu #include <net/lwtunnel.h> 1078d71740cSTom Tucker #include <net/netevent.h> 10863f3444fSThomas Graf #include <net/rtnetlink.h> 1091da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL 1101da177e4SLinus Torvalds #include <linux/sysctl.h> 1111da177e4SLinus Torvalds #endif 1126e5714eaSDavid S. Miller #include <net/secure_seq.h> 1131b7179d3SThomas Graf #include <net/ip_tunnels.h> 114385add90SDavid Ahern #include <net/l3mdev.h> 1151da177e4SLinus Torvalds 116b6179813SRoopa Prabhu #include "fib_lookup.h" 117b6179813SRoopa Prabhu 11868a5e3ddSDavid S. Miller #define RT_FL_TOS(oldflp4) \ 119f61759e6SJulian Anastasov ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds #define RT_GC_TIMEOUT (300*HZ) 1221da177e4SLinus Torvalds 1231da177e4SLinus Torvalds static int ip_rt_max_size; 124817bc4dbSStephen Hemminger static int ip_rt_redirect_number __read_mostly = 9; 125817bc4dbSStephen Hemminger static int ip_rt_redirect_load __read_mostly = HZ / 50; 126817bc4dbSStephen Hemminger static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); 127817bc4dbSStephen Hemminger static int ip_rt_error_cost __read_mostly = HZ; 128817bc4dbSStephen Hemminger static int ip_rt_error_burst __read_mostly = 5 * HZ; 129817bc4dbSStephen Hemminger static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; 130c7272c2fSSabrina Dubroca static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; 131817bc4dbSStephen Hemminger static int ip_rt_min_advmss __read_mostly = 256; 1329f28a2fcSEric Dumazet 133deed49dfSXin Long static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; 134c7272c2fSSabrina Dubroca 1351da177e4SLinus Torvalds /* 1361da177e4SLinus Torvalds * Interface to generic destination cache. 1371da177e4SLinus Torvalds */ 1381da177e4SLinus Torvalds 1391da177e4SLinus Torvalds static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); 1400dbaee3bSDavid S. Miller static unsigned int ipv4_default_advmss(const struct dst_entry *dst); 141ebb762f2SSteffen Klassert static unsigned int ipv4_mtu(const struct dst_entry *dst); 1421da177e4SLinus Torvalds static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 1431da177e4SLinus Torvalds static void ipv4_link_failure(struct sk_buff *skb); 1446700c270SDavid S. Miller static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1456700c270SDavid S. Miller struct sk_buff *skb, u32 mtu); 1466700c270SDavid S. Miller static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, 1476700c270SDavid S. Miller struct sk_buff *skb); 148caacf05eSDavid S. Miller static void ipv4_dst_destroy(struct dst_entry *dst); 1491da177e4SLinus Torvalds 15062fa8a84SDavid S. Miller static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) 15162fa8a84SDavid S. Miller { 15231248731SDavid S. Miller WARN_ON(1); 15331248731SDavid S. Miller return NULL; 15462fa8a84SDavid S. Miller } 15562fa8a84SDavid S. Miller 156f894cbf8SDavid S. Miller static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, 157f894cbf8SDavid S. Miller struct sk_buff *skb, 158f894cbf8SDavid S. Miller const void *daddr); 15963fca65dSJulian Anastasov static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); 160d3aaeb38SDavid S. Miller 1611da177e4SLinus Torvalds static struct dst_ops ipv4_dst_ops = { 1621da177e4SLinus Torvalds .family = AF_INET, 1631da177e4SLinus Torvalds .check = ipv4_dst_check, 1640dbaee3bSDavid S. Miller .default_advmss = ipv4_default_advmss, 165ebb762f2SSteffen Klassert .mtu = ipv4_mtu, 16662fa8a84SDavid S. Miller .cow_metrics = ipv4_cow_metrics, 167caacf05eSDavid S. Miller .destroy = ipv4_dst_destroy, 1681da177e4SLinus Torvalds .negative_advice = ipv4_negative_advice, 1691da177e4SLinus Torvalds .link_failure = ipv4_link_failure, 1701da177e4SLinus Torvalds .update_pmtu = ip_rt_update_pmtu, 171e47a185bSDavid S. Miller .redirect = ip_do_redirect, 172b92dacd4SEric W. Biederman .local_out = __ip_local_out, 173d3aaeb38SDavid S. Miller .neigh_lookup = ipv4_neigh_lookup, 17463fca65dSJulian Anastasov .confirm_neigh = ipv4_confirm_neigh, 1751da177e4SLinus Torvalds }; 1761da177e4SLinus Torvalds 1771da177e4SLinus Torvalds #define ECN_OR_COST(class) TC_PRIO_##class 1781da177e4SLinus Torvalds 1794839c52bSPhilippe De Muyter const __u8 ip_tos2prio[16] = { 1801da177e4SLinus Torvalds TC_PRIO_BESTEFFORT, 1814a2b9c37SDan Siemon ECN_OR_COST(BESTEFFORT), 1821da177e4SLinus Torvalds TC_PRIO_BESTEFFORT, 1831da177e4SLinus Torvalds ECN_OR_COST(BESTEFFORT), 1841da177e4SLinus Torvalds TC_PRIO_BULK, 1851da177e4SLinus Torvalds ECN_OR_COST(BULK), 1861da177e4SLinus Torvalds TC_PRIO_BULK, 1871da177e4SLinus Torvalds ECN_OR_COST(BULK), 1881da177e4SLinus Torvalds TC_PRIO_INTERACTIVE, 1891da177e4SLinus Torvalds ECN_OR_COST(INTERACTIVE), 1901da177e4SLinus Torvalds TC_PRIO_INTERACTIVE, 1911da177e4SLinus Torvalds ECN_OR_COST(INTERACTIVE), 1921da177e4SLinus Torvalds TC_PRIO_INTERACTIVE_BULK, 1931da177e4SLinus Torvalds ECN_OR_COST(INTERACTIVE_BULK), 1941da177e4SLinus Torvalds TC_PRIO_INTERACTIVE_BULK, 1951da177e4SLinus Torvalds ECN_OR_COST(INTERACTIVE_BULK) 1961da177e4SLinus Torvalds }; 197d4a96865SAmir Vadai EXPORT_SYMBOL(ip_tos2prio); 1981da177e4SLinus Torvalds 1992f970d83SEric Dumazet static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 2003ed66e91SChristoph Lameter #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) 2011da177e4SLinus Torvalds 2021da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 2031da177e4SLinus Torvalds static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 2041da177e4SLinus Torvalds { 20529e75252SEric Dumazet if (*pos) 20689aef892SDavid S. Miller return NULL; 20729e75252SEric Dumazet return SEQ_START_TOKEN; 2081da177e4SLinus Torvalds } 2091da177e4SLinus Torvalds 2101da177e4SLinus Torvalds static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2111da177e4SLinus Torvalds { 2121da177e4SLinus Torvalds ++*pos; 21389aef892SDavid S. Miller return NULL; 2141da177e4SLinus Torvalds } 2151da177e4SLinus Torvalds 2161da177e4SLinus Torvalds static void rt_cache_seq_stop(struct seq_file *seq, void *v) 2171da177e4SLinus Torvalds { 2181da177e4SLinus Torvalds } 2191da177e4SLinus Torvalds 2201da177e4SLinus Torvalds static int rt_cache_seq_show(struct seq_file *seq, void *v) 2211da177e4SLinus Torvalds { 2221da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) 2231da177e4SLinus Torvalds seq_printf(seq, "%-127s\n", 2241da177e4SLinus Torvalds "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" 2251da177e4SLinus Torvalds "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" 2261da177e4SLinus Torvalds "HHUptod\tSpecDst"); 2271da177e4SLinus Torvalds return 0; 2281da177e4SLinus Torvalds } 2291da177e4SLinus Torvalds 230f690808eSStephen Hemminger static const struct seq_operations rt_cache_seq_ops = { 2311da177e4SLinus Torvalds .start = rt_cache_seq_start, 2321da177e4SLinus Torvalds .next = rt_cache_seq_next, 2331da177e4SLinus Torvalds .stop = rt_cache_seq_stop, 2341da177e4SLinus Torvalds .show = rt_cache_seq_show, 2351da177e4SLinus Torvalds }; 2361da177e4SLinus Torvalds 2371da177e4SLinus Torvalds static int rt_cache_seq_open(struct inode *inode, struct file *file) 2381da177e4SLinus Torvalds { 23989aef892SDavid S. Miller return seq_open(file, &rt_cache_seq_ops); 2401da177e4SLinus Torvalds } 2411da177e4SLinus Torvalds 2429a32144eSArjan van de Ven static const struct file_operations rt_cache_seq_fops = { 2431da177e4SLinus Torvalds .open = rt_cache_seq_open, 2441da177e4SLinus Torvalds .read = seq_read, 2451da177e4SLinus Torvalds .llseek = seq_lseek, 24689aef892SDavid S. Miller .release = seq_release, 2471da177e4SLinus Torvalds }; 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds 2501da177e4SLinus Torvalds static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) 2511da177e4SLinus Torvalds { 2521da177e4SLinus Torvalds int cpu; 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds if (*pos == 0) 2551da177e4SLinus Torvalds return SEQ_START_TOKEN; 2561da177e4SLinus Torvalds 2570f23174aSRusty Russell for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 2581da177e4SLinus Torvalds if (!cpu_possible(cpu)) 2591da177e4SLinus Torvalds continue; 2601da177e4SLinus Torvalds *pos = cpu+1; 2612f970d83SEric Dumazet return &per_cpu(rt_cache_stat, cpu); 2621da177e4SLinus Torvalds } 2631da177e4SLinus Torvalds return NULL; 2641da177e4SLinus Torvalds } 2651da177e4SLinus Torvalds 2661da177e4SLinus Torvalds static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2671da177e4SLinus Torvalds { 2681da177e4SLinus Torvalds int cpu; 2691da177e4SLinus Torvalds 2700f23174aSRusty Russell for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 2711da177e4SLinus Torvalds if (!cpu_possible(cpu)) 2721da177e4SLinus Torvalds continue; 2731da177e4SLinus Torvalds *pos = cpu+1; 2742f970d83SEric Dumazet return &per_cpu(rt_cache_stat, cpu); 2751da177e4SLinus Torvalds } 2761da177e4SLinus Torvalds return NULL; 2771da177e4SLinus Torvalds 2781da177e4SLinus Torvalds } 2791da177e4SLinus Torvalds 2801da177e4SLinus Torvalds static void rt_cpu_seq_stop(struct seq_file *seq, void *v) 2811da177e4SLinus Torvalds { 2821da177e4SLinus Torvalds 2831da177e4SLinus Torvalds } 2841da177e4SLinus Torvalds 2851da177e4SLinus Torvalds static int rt_cpu_seq_show(struct seq_file *seq, void *v) 2861da177e4SLinus Torvalds { 2871da177e4SLinus Torvalds struct rt_cache_stat *st = v; 2881da177e4SLinus Torvalds 2891da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 2905bec0039SOlaf Rempel seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); 2911da177e4SLinus Torvalds return 0; 2921da177e4SLinus Torvalds } 2931da177e4SLinus Torvalds 2941da177e4SLinus Torvalds seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 2951da177e4SLinus Torvalds " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 296fc66f95cSEric Dumazet dst_entries_get_slow(&ipv4_dst_ops), 2970baf2b35SEric Dumazet 0, /* st->in_hit */ 2981da177e4SLinus Torvalds st->in_slow_tot, 2991da177e4SLinus Torvalds st->in_slow_mc, 3001da177e4SLinus Torvalds st->in_no_route, 3011da177e4SLinus Torvalds st->in_brd, 3021da177e4SLinus Torvalds st->in_martian_dst, 3031da177e4SLinus Torvalds st->in_martian_src, 3041da177e4SLinus Torvalds 3050baf2b35SEric Dumazet 0, /* st->out_hit */ 3061da177e4SLinus Torvalds st->out_slow_tot, 3071da177e4SLinus Torvalds st->out_slow_mc, 3081da177e4SLinus Torvalds 3090baf2b35SEric Dumazet 0, /* st->gc_total */ 3100baf2b35SEric Dumazet 0, /* st->gc_ignored */ 3110baf2b35SEric Dumazet 0, /* st->gc_goal_miss */ 3120baf2b35SEric Dumazet 0, /* st->gc_dst_overflow */ 3130baf2b35SEric Dumazet 0, /* st->in_hlist_search */ 3140baf2b35SEric Dumazet 0 /* st->out_hlist_search */ 3151da177e4SLinus Torvalds ); 3161da177e4SLinus Torvalds return 0; 3171da177e4SLinus Torvalds } 3181da177e4SLinus Torvalds 319f690808eSStephen Hemminger static const struct seq_operations rt_cpu_seq_ops = { 3201da177e4SLinus Torvalds .start = rt_cpu_seq_start, 3211da177e4SLinus Torvalds .next = rt_cpu_seq_next, 3221da177e4SLinus Torvalds .stop = rt_cpu_seq_stop, 3231da177e4SLinus Torvalds .show = rt_cpu_seq_show, 3241da177e4SLinus Torvalds }; 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds 3271da177e4SLinus Torvalds static int rt_cpu_seq_open(struct inode *inode, struct file *file) 3281da177e4SLinus Torvalds { 3291da177e4SLinus Torvalds return seq_open(file, &rt_cpu_seq_ops); 3301da177e4SLinus Torvalds } 3311da177e4SLinus Torvalds 3329a32144eSArjan van de Ven static const struct file_operations rt_cpu_seq_fops = { 3331da177e4SLinus Torvalds .open = rt_cpu_seq_open, 3341da177e4SLinus Torvalds .read = seq_read, 3351da177e4SLinus Torvalds .llseek = seq_lseek, 3361da177e4SLinus Torvalds .release = seq_release, 3371da177e4SLinus Torvalds }; 3381da177e4SLinus Torvalds 339c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 340a661c419SAlexey Dobriyan static int rt_acct_proc_show(struct seq_file *m, void *v) 34178c686e9SPavel Emelyanov { 342a661c419SAlexey Dobriyan struct ip_rt_acct *dst, *src; 343a661c419SAlexey Dobriyan unsigned int i, j; 34478c686e9SPavel Emelyanov 345a661c419SAlexey Dobriyan dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); 346a661c419SAlexey Dobriyan if (!dst) 347a661c419SAlexey Dobriyan return -ENOMEM; 34878c686e9SPavel Emelyanov 349a661c419SAlexey Dobriyan for_each_possible_cpu(i) { 350a661c419SAlexey Dobriyan src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); 351a661c419SAlexey Dobriyan for (j = 0; j < 256; j++) { 352a661c419SAlexey Dobriyan dst[j].o_bytes += src[j].o_bytes; 353a661c419SAlexey Dobriyan dst[j].o_packets += src[j].o_packets; 354a661c419SAlexey Dobriyan dst[j].i_bytes += src[j].i_bytes; 355a661c419SAlexey Dobriyan dst[j].i_packets += src[j].i_packets; 356a661c419SAlexey Dobriyan } 357a661c419SAlexey Dobriyan } 358a661c419SAlexey Dobriyan 359a661c419SAlexey Dobriyan seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); 360a661c419SAlexey Dobriyan kfree(dst); 36178c686e9SPavel Emelyanov return 0; 36278c686e9SPavel Emelyanov } 36378c686e9SPavel Emelyanov #endif 364107f1634SPavel Emelyanov 36573b38711SDenis V. Lunev static int __net_init ip_rt_do_proc_init(struct net *net) 366107f1634SPavel Emelyanov { 367107f1634SPavel Emelyanov struct proc_dir_entry *pde; 368107f1634SPavel Emelyanov 369d6444062SJoe Perches pde = proc_create("rt_cache", 0444, net->proc_net, 370107f1634SPavel Emelyanov &rt_cache_seq_fops); 371107f1634SPavel Emelyanov if (!pde) 372107f1634SPavel Emelyanov goto err1; 373107f1634SPavel Emelyanov 374d6444062SJoe Perches pde = proc_create("rt_cache", 0444, 37577020720SWang Chen net->proc_net_stat, &rt_cpu_seq_fops); 376107f1634SPavel Emelyanov if (!pde) 377107f1634SPavel Emelyanov goto err2; 378107f1634SPavel Emelyanov 379c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 3803f3942acSChristoph Hellwig pde = proc_create_single("rt_acct", 0, net->proc_net, 3813f3942acSChristoph Hellwig rt_acct_proc_show); 382107f1634SPavel Emelyanov if (!pde) 383107f1634SPavel Emelyanov goto err3; 384107f1634SPavel Emelyanov #endif 385107f1634SPavel Emelyanov return 0; 386107f1634SPavel Emelyanov 387c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 388107f1634SPavel Emelyanov err3: 389107f1634SPavel Emelyanov remove_proc_entry("rt_cache", net->proc_net_stat); 390107f1634SPavel Emelyanov #endif 391107f1634SPavel Emelyanov err2: 392107f1634SPavel Emelyanov remove_proc_entry("rt_cache", net->proc_net); 393107f1634SPavel Emelyanov err1: 394107f1634SPavel Emelyanov return -ENOMEM; 395107f1634SPavel Emelyanov } 39673b38711SDenis V. Lunev 39773b38711SDenis V. Lunev static void __net_exit ip_rt_do_proc_exit(struct net *net) 39873b38711SDenis V. Lunev { 39973b38711SDenis V. Lunev remove_proc_entry("rt_cache", net->proc_net_stat); 40073b38711SDenis V. Lunev remove_proc_entry("rt_cache", net->proc_net); 401c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 40273b38711SDenis V. Lunev remove_proc_entry("rt_acct", net->proc_net); 4030a931acfSAlexey Dobriyan #endif 40473b38711SDenis V. Lunev } 40573b38711SDenis V. Lunev 40673b38711SDenis V. Lunev static struct pernet_operations ip_rt_proc_ops __net_initdata = { 40773b38711SDenis V. Lunev .init = ip_rt_do_proc_init, 40873b38711SDenis V. Lunev .exit = ip_rt_do_proc_exit, 40973b38711SDenis V. Lunev }; 41073b38711SDenis V. Lunev 41173b38711SDenis V. Lunev static int __init ip_rt_proc_init(void) 41273b38711SDenis V. Lunev { 41373b38711SDenis V. Lunev return register_pernet_subsys(&ip_rt_proc_ops); 41473b38711SDenis V. Lunev } 41573b38711SDenis V. Lunev 416107f1634SPavel Emelyanov #else 41773b38711SDenis V. Lunev static inline int ip_rt_proc_init(void) 418107f1634SPavel Emelyanov { 419107f1634SPavel Emelyanov return 0; 420107f1634SPavel Emelyanov } 4211da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 4221da177e4SLinus Torvalds 4234331debcSEric Dumazet static inline bool rt_is_expired(const struct rtable *rth) 424e84f84f2SDenis V. Lunev { 425ca4c3fc2Sfan.du return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); 426e84f84f2SDenis V. Lunev } 427e84f84f2SDenis V. Lunev 4284ccfe6d4SNicolas Dichtel void rt_cache_flush(struct net *net) 42929e75252SEric Dumazet { 430ca4c3fc2Sfan.du rt_genid_bump_ipv4(net); 43198376387SEric Dumazet } 43298376387SEric Dumazet 433f894cbf8SDavid S. Miller static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, 434f894cbf8SDavid S. Miller struct sk_buff *skb, 435f894cbf8SDavid S. Miller const void *daddr) 4363769cffbSDavid Miller { 437d3aaeb38SDavid S. Miller struct net_device *dev = dst->dev; 438d3aaeb38SDavid S. Miller const __be32 *pkey = daddr; 43939232973SDavid S. Miller const struct rtable *rt; 4403769cffbSDavid Miller struct neighbour *n; 4413769cffbSDavid Miller 44239232973SDavid S. Miller rt = (const struct rtable *) dst; 443a263b309SDavid S. Miller if (rt->rt_gateway) 44439232973SDavid S. Miller pkey = (const __be32 *) &rt->rt_gateway; 445f894cbf8SDavid S. Miller else if (skb) 446f894cbf8SDavid S. Miller pkey = &ip_hdr(skb)->daddr; 447d3aaeb38SDavid S. Miller 44880703d26SDavid S. Miller n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); 449d3aaeb38SDavid S. Miller if (n) 450d3aaeb38SDavid S. Miller return n; 45132092ecfSDavid Miller return neigh_create(&arp_tbl, pkey, dev); 452d3aaeb38SDavid S. Miller } 453d3aaeb38SDavid S. Miller 45463fca65dSJulian Anastasov static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) 45563fca65dSJulian Anastasov { 45663fca65dSJulian Anastasov struct net_device *dev = dst->dev; 45763fca65dSJulian Anastasov const __be32 *pkey = daddr; 45863fca65dSJulian Anastasov const struct rtable *rt; 45963fca65dSJulian Anastasov 46063fca65dSJulian Anastasov rt = (const struct rtable *)dst; 46163fca65dSJulian Anastasov if (rt->rt_gateway) 46263fca65dSJulian Anastasov pkey = (const __be32 *)&rt->rt_gateway; 46363fca65dSJulian Anastasov else if (!daddr || 46463fca65dSJulian Anastasov (rt->rt_flags & 46563fca65dSJulian Anastasov (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) 46663fca65dSJulian Anastasov return; 46763fca65dSJulian Anastasov 46863fca65dSJulian Anastasov __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); 46963fca65dSJulian Anastasov } 47063fca65dSJulian Anastasov 47104ca6973SEric Dumazet #define IP_IDENTS_SZ 2048u 47204ca6973SEric Dumazet 473355b590cSEric Dumazet static atomic_t *ip_idents __read_mostly; 474355b590cSEric Dumazet static u32 *ip_tstamps __read_mostly; 47504ca6973SEric Dumazet 47604ca6973SEric Dumazet /* In order to protect privacy, we add a perturbation to identifiers 47704ca6973SEric Dumazet * if one generator is seldom used. This makes hard for an attacker 47804ca6973SEric Dumazet * to infer how many packets were sent between two points in time. 47904ca6973SEric Dumazet */ 48004ca6973SEric Dumazet u32 ip_idents_reserve(u32 hash, int segs) 48104ca6973SEric Dumazet { 482355b590cSEric Dumazet u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ; 483355b590cSEric Dumazet atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; 4846aa7de05SMark Rutland u32 old = READ_ONCE(*p_tstamp); 48504ca6973SEric Dumazet u32 now = (u32)jiffies; 486adb03115SEric Dumazet u32 new, delta = 0; 48704ca6973SEric Dumazet 488355b590cSEric Dumazet if (old != now && cmpxchg(p_tstamp, old, now) == old) 48904ca6973SEric Dumazet delta = prandom_u32_max(now - old); 49004ca6973SEric Dumazet 491adb03115SEric Dumazet /* Do not use atomic_add_return() as it makes UBSAN unhappy */ 492adb03115SEric Dumazet do { 493adb03115SEric Dumazet old = (u32)atomic_read(p_id); 494adb03115SEric Dumazet new = old + delta + segs; 495adb03115SEric Dumazet } while (atomic_cmpxchg(p_id, old, new) != old); 496adb03115SEric Dumazet 497adb03115SEric Dumazet return new - segs; 49804ca6973SEric Dumazet } 49904ca6973SEric Dumazet EXPORT_SYMBOL(ip_idents_reserve); 50073f156a6SEric Dumazet 501b6a7719aSHannes Frederic Sowa void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) 5021da177e4SLinus Torvalds { 50373f156a6SEric Dumazet static u32 ip_idents_hashrnd __read_mostly; 50473f156a6SEric Dumazet u32 hash, id; 5051da177e4SLinus Torvalds 50673f156a6SEric Dumazet net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); 5071da177e4SLinus Torvalds 50804ca6973SEric Dumazet hash = jhash_3words((__force u32)iph->daddr, 50904ca6973SEric Dumazet (__force u32)iph->saddr, 510b6a7719aSHannes Frederic Sowa iph->protocol ^ net_hash_mix(net), 51104ca6973SEric Dumazet ip_idents_hashrnd); 51273f156a6SEric Dumazet id = ip_idents_reserve(hash, segs); 51373f156a6SEric Dumazet iph->id = htons(id); 5141da177e4SLinus Torvalds } 5154bc2f18bSEric Dumazet EXPORT_SYMBOL(__ip_select_ident); 5161da177e4SLinus Torvalds 517e2d118a1SLorenzo Colitti static void __build_flow_key(const struct net *net, struct flowi4 *fl4, 518e2d118a1SLorenzo Colitti const struct sock *sk, 5194895c771SDavid S. Miller const struct iphdr *iph, 5204895c771SDavid S. Miller int oif, u8 tos, 5214895c771SDavid S. Miller u8 prot, u32 mark, int flow_flags) 5224895c771SDavid S. Miller { 5234895c771SDavid S. Miller if (sk) { 5244895c771SDavid S. Miller const struct inet_sock *inet = inet_sk(sk); 5254895c771SDavid S. Miller 5264895c771SDavid S. Miller oif = sk->sk_bound_dev_if; 5274895c771SDavid S. Miller mark = sk->sk_mark; 5284895c771SDavid S. Miller tos = RT_CONN_FLAGS(sk); 5294895c771SDavid S. Miller prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; 5304895c771SDavid S. Miller } 5314895c771SDavid S. Miller flowi4_init_output(fl4, oif, mark, tos, 5324895c771SDavid S. Miller RT_SCOPE_UNIVERSE, prot, 5334895c771SDavid S. Miller flow_flags, 534e2d118a1SLorenzo Colitti iph->daddr, iph->saddr, 0, 0, 535e2d118a1SLorenzo Colitti sock_net_uid(net, sk)); 5364895c771SDavid S. Miller } 5374895c771SDavid S. Miller 5385abf7f7eSEric Dumazet static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, 5395abf7f7eSEric Dumazet const struct sock *sk) 5404895c771SDavid S. Miller { 541d109e61bSLorenzo Colitti const struct net *net = dev_net(skb->dev); 5424895c771SDavid S. Miller const struct iphdr *iph = ip_hdr(skb); 5434895c771SDavid S. Miller int oif = skb->dev->ifindex; 5444895c771SDavid S. Miller u8 tos = RT_TOS(iph->tos); 5454895c771SDavid S. Miller u8 prot = iph->protocol; 5464895c771SDavid S. Miller u32 mark = skb->mark; 5474895c771SDavid S. Miller 548d109e61bSLorenzo Colitti __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); 5494895c771SDavid S. Miller } 5504895c771SDavid S. Miller 5515abf7f7eSEric Dumazet static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) 5524895c771SDavid S. Miller { 5534895c771SDavid S. Miller const struct inet_sock *inet = inet_sk(sk); 5545abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 5554895c771SDavid S. Miller __be32 daddr = inet->inet_daddr; 5564895c771SDavid S. Miller 5574895c771SDavid S. Miller rcu_read_lock(); 5584895c771SDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 5594895c771SDavid S. Miller if (inet_opt && inet_opt->opt.srr) 5604895c771SDavid S. Miller daddr = inet_opt->opt.faddr; 5614895c771SDavid S. Miller flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 5624895c771SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 5634895c771SDavid S. Miller inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 5644895c771SDavid S. Miller inet_sk_flowi_flags(sk), 565e2d118a1SLorenzo Colitti daddr, inet->inet_saddr, 0, 0, sk->sk_uid); 5664895c771SDavid S. Miller rcu_read_unlock(); 5674895c771SDavid S. Miller } 5684895c771SDavid S. Miller 5695abf7f7eSEric Dumazet static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, 5705abf7f7eSEric Dumazet const struct sk_buff *skb) 5714895c771SDavid S. Miller { 5724895c771SDavid S. Miller if (skb) 5734895c771SDavid S. Miller build_skb_flow_key(fl4, skb, sk); 5744895c771SDavid S. Miller else 5754895c771SDavid S. Miller build_sk_flow_key(fl4, sk); 5764895c771SDavid S. Miller } 5774895c771SDavid S. Miller 578c5038a83SDavid S. Miller static DEFINE_SPINLOCK(fnhe_lock); 5794895c771SDavid S. Miller 5802ffae99dSTimo Teräs static void fnhe_flush_routes(struct fib_nh_exception *fnhe) 5812ffae99dSTimo Teräs { 5822ffae99dSTimo Teräs struct rtable *rt; 5832ffae99dSTimo Teräs 5842ffae99dSTimo Teräs rt = rcu_dereference(fnhe->fnhe_rth_input); 5852ffae99dSTimo Teräs if (rt) { 5862ffae99dSTimo Teräs RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); 58795c47f9cSWei Wang dst_dev_put(&rt->dst); 5880830106cSWei Wang dst_release(&rt->dst); 5892ffae99dSTimo Teräs } 5902ffae99dSTimo Teräs rt = rcu_dereference(fnhe->fnhe_rth_output); 5912ffae99dSTimo Teräs if (rt) { 5922ffae99dSTimo Teräs RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); 59395c47f9cSWei Wang dst_dev_put(&rt->dst); 5940830106cSWei Wang dst_release(&rt->dst); 5952ffae99dSTimo Teräs } 5962ffae99dSTimo Teräs } 5972ffae99dSTimo Teräs 598aee06da6SJulian Anastasov static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 5994895c771SDavid S. Miller { 6004895c771SDavid S. Miller struct fib_nh_exception *fnhe, *oldest; 6014895c771SDavid S. Miller 6024895c771SDavid S. Miller oldest = rcu_dereference(hash->chain); 6034895c771SDavid S. Miller for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 6044895c771SDavid S. Miller fnhe = rcu_dereference(fnhe->fnhe_next)) { 6054895c771SDavid S. Miller if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 6064895c771SDavid S. Miller oldest = fnhe; 6074895c771SDavid S. Miller } 6082ffae99dSTimo Teräs fnhe_flush_routes(oldest); 6094895c771SDavid S. Miller return oldest; 6104895c771SDavid S. Miller } 6114895c771SDavid S. Miller 612d3a25c98SDavid S. Miller static inline u32 fnhe_hashfun(__be32 daddr) 613d3a25c98SDavid S. Miller { 614d546c621SEric Dumazet static u32 fnhe_hashrnd __read_mostly; 615d3a25c98SDavid S. Miller u32 hval; 616d3a25c98SDavid S. Miller 617d546c621SEric Dumazet net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd)); 618d546c621SEric Dumazet hval = jhash_1word((__force u32) daddr, fnhe_hashrnd); 619d546c621SEric Dumazet return hash_32(hval, FNHE_HASH_SHIFT); 620d3a25c98SDavid S. Miller } 621d3a25c98SDavid S. Miller 622387aa65aSTimo Teräs static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) 623387aa65aSTimo Teräs { 624387aa65aSTimo Teräs rt->rt_pmtu = fnhe->fnhe_pmtu; 625d52e5a7eSSabrina Dubroca rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; 626387aa65aSTimo Teräs rt->dst.expires = fnhe->fnhe_expires; 627387aa65aSTimo Teräs 628387aa65aSTimo Teräs if (fnhe->fnhe_gw) { 629387aa65aSTimo Teräs rt->rt_flags |= RTCF_REDIRECTED; 630387aa65aSTimo Teräs rt->rt_gateway = fnhe->fnhe_gw; 631387aa65aSTimo Teräs rt->rt_uses_gateway = 1; 632387aa65aSTimo Teräs } 633387aa65aSTimo Teräs } 634387aa65aSTimo Teräs 635aee06da6SJulian Anastasov static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, 636d52e5a7eSSabrina Dubroca u32 pmtu, bool lock, unsigned long expires) 6374895c771SDavid S. Miller { 638aee06da6SJulian Anastasov struct fnhe_hash_bucket *hash; 6394895c771SDavid S. Miller struct fib_nh_exception *fnhe; 640387aa65aSTimo Teräs struct rtable *rt; 641cebe84c6SXin Long u32 genid, hval; 642387aa65aSTimo Teräs unsigned int i; 6434895c771SDavid S. Miller int depth; 644cebe84c6SXin Long 645cebe84c6SXin Long genid = fnhe_genid(dev_net(nh->nh_dev)); 646cebe84c6SXin Long hval = fnhe_hashfun(daddr); 6474895c771SDavid S. Miller 648c5038a83SDavid S. Miller spin_lock_bh(&fnhe_lock); 649aee06da6SJulian Anastasov 650caa41527SEric Dumazet hash = rcu_dereference(nh->nh_exceptions); 6514895c771SDavid S. Miller if (!hash) { 6526396bb22SKees Cook hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); 6534895c771SDavid S. Miller if (!hash) 654aee06da6SJulian Anastasov goto out_unlock; 655caa41527SEric Dumazet rcu_assign_pointer(nh->nh_exceptions, hash); 6564895c771SDavid S. Miller } 6574895c771SDavid S. Miller 6584895c771SDavid S. Miller hash += hval; 6594895c771SDavid S. Miller 6604895c771SDavid S. Miller depth = 0; 6614895c771SDavid S. Miller for (fnhe = rcu_dereference(hash->chain); fnhe; 6624895c771SDavid S. Miller fnhe = rcu_dereference(fnhe->fnhe_next)) { 6634895c771SDavid S. Miller if (fnhe->fnhe_daddr == daddr) 664aee06da6SJulian Anastasov break; 6654895c771SDavid S. Miller depth++; 6664895c771SDavid S. Miller } 6674895c771SDavid S. Miller 668aee06da6SJulian Anastasov if (fnhe) { 669cebe84c6SXin Long if (fnhe->fnhe_genid != genid) 670cebe84c6SXin Long fnhe->fnhe_genid = genid; 671aee06da6SJulian Anastasov if (gw) 672aee06da6SJulian Anastasov fnhe->fnhe_gw = gw; 673d52e5a7eSSabrina Dubroca if (pmtu) { 674aee06da6SJulian Anastasov fnhe->fnhe_pmtu = pmtu; 675d52e5a7eSSabrina Dubroca fnhe->fnhe_mtu_locked = lock; 676d52e5a7eSSabrina Dubroca } 677387aa65aSTimo Teräs fnhe->fnhe_expires = max(1UL, expires); 678387aa65aSTimo Teräs /* Update all cached dsts too */ 6792ffae99dSTimo Teräs rt = rcu_dereference(fnhe->fnhe_rth_input); 6802ffae99dSTimo Teräs if (rt) 6812ffae99dSTimo Teräs fill_route_from_fnhe(rt, fnhe); 6822ffae99dSTimo Teräs rt = rcu_dereference(fnhe->fnhe_rth_output); 683387aa65aSTimo Teräs if (rt) 684387aa65aSTimo Teräs fill_route_from_fnhe(rt, fnhe); 685aee06da6SJulian Anastasov } else { 686aee06da6SJulian Anastasov if (depth > FNHE_RECLAIM_DEPTH) 687aee06da6SJulian Anastasov fnhe = fnhe_oldest(hash); 688aee06da6SJulian Anastasov else { 6894895c771SDavid S. Miller fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); 6904895c771SDavid S. Miller if (!fnhe) 691aee06da6SJulian Anastasov goto out_unlock; 6924895c771SDavid S. Miller 6934895c771SDavid S. Miller fnhe->fnhe_next = hash->chain; 6944895c771SDavid S. Miller rcu_assign_pointer(hash->chain, fnhe); 695aee06da6SJulian Anastasov } 696cebe84c6SXin Long fnhe->fnhe_genid = genid; 6974895c771SDavid S. Miller fnhe->fnhe_daddr = daddr; 698aee06da6SJulian Anastasov fnhe->fnhe_gw = gw; 699aee06da6SJulian Anastasov fnhe->fnhe_pmtu = pmtu; 700d52e5a7eSSabrina Dubroca fnhe->fnhe_mtu_locked = lock; 70194720e3aSJulian Anastasov fnhe->fnhe_expires = max(1UL, expires); 702387aa65aSTimo Teräs 703387aa65aSTimo Teräs /* Exception created; mark the cached routes for the nexthop 704387aa65aSTimo Teräs * stale, so anyone caching it rechecks if this exception 705387aa65aSTimo Teräs * applies to them. 706387aa65aSTimo Teräs */ 7072ffae99dSTimo Teräs rt = rcu_dereference(nh->nh_rth_input); 7082ffae99dSTimo Teräs if (rt) 7092ffae99dSTimo Teräs rt->dst.obsolete = DST_OBSOLETE_KILL; 7102ffae99dSTimo Teräs 711387aa65aSTimo Teräs for_each_possible_cpu(i) { 712387aa65aSTimo Teräs struct rtable __rcu **prt; 713387aa65aSTimo Teräs prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); 714387aa65aSTimo Teräs rt = rcu_dereference(*prt); 715387aa65aSTimo Teräs if (rt) 716387aa65aSTimo Teräs rt->dst.obsolete = DST_OBSOLETE_KILL; 717387aa65aSTimo Teräs } 718aee06da6SJulian Anastasov } 719aee06da6SJulian Anastasov 7204895c771SDavid S. Miller fnhe->fnhe_stamp = jiffies; 721aee06da6SJulian Anastasov 722aee06da6SJulian Anastasov out_unlock: 723c5038a83SDavid S. Miller spin_unlock_bh(&fnhe_lock); 7244895c771SDavid S. Miller } 7254895c771SDavid S. Miller 726ceb33206SDavid S. Miller static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, 727ceb33206SDavid S. Miller bool kill_route) 7281da177e4SLinus Torvalds { 729e47a185bSDavid S. Miller __be32 new_gw = icmp_hdr(skb)->un.gateway; 73094206125SDavid S. Miller __be32 old_gw = ip_hdr(skb)->saddr; 731e47a185bSDavid S. Miller struct net_device *dev = skb->dev; 732e47a185bSDavid S. Miller struct in_device *in_dev; 7334895c771SDavid S. Miller struct fib_result res; 734e47a185bSDavid S. Miller struct neighbour *n; 735317805b8SDenis V. Lunev struct net *net; 7361da177e4SLinus Torvalds 73794206125SDavid S. Miller switch (icmp_hdr(skb)->code & 7) { 73894206125SDavid S. Miller case ICMP_REDIR_NET: 73994206125SDavid S. Miller case ICMP_REDIR_NETTOS: 74094206125SDavid S. Miller case ICMP_REDIR_HOST: 74194206125SDavid S. Miller case ICMP_REDIR_HOSTTOS: 74294206125SDavid S. Miller break; 74394206125SDavid S. Miller 74494206125SDavid S. Miller default: 74594206125SDavid S. Miller return; 74694206125SDavid S. Miller } 74794206125SDavid S. Miller 748e47a185bSDavid S. Miller if (rt->rt_gateway != old_gw) 749e47a185bSDavid S. Miller return; 750e47a185bSDavid S. Miller 751e47a185bSDavid S. Miller in_dev = __in_dev_get_rcu(dev); 752e47a185bSDavid S. Miller if (!in_dev) 753e47a185bSDavid S. Miller return; 754e47a185bSDavid S. Miller 755c346dca1SYOSHIFUJI Hideaki net = dev_net(dev); 7569d4fb27dSJoe Perches if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || 7579d4fb27dSJoe Perches ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || 7589d4fb27dSJoe Perches ipv4_is_zeronet(new_gw)) 7591da177e4SLinus Torvalds goto reject_redirect; 7601da177e4SLinus Torvalds 7611da177e4SLinus Torvalds if (!IN_DEV_SHARED_MEDIA(in_dev)) { 7621da177e4SLinus Torvalds if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 7631da177e4SLinus Torvalds goto reject_redirect; 7641da177e4SLinus Torvalds if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 7651da177e4SLinus Torvalds goto reject_redirect; 7661da177e4SLinus Torvalds } else { 767317805b8SDenis V. Lunev if (inet_addr_type(net, new_gw) != RTN_UNICAST) 7681da177e4SLinus Torvalds goto reject_redirect; 7691da177e4SLinus Torvalds } 7701da177e4SLinus Torvalds 771969447f2SStephen Suryaputra Lin n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); 772969447f2SStephen Suryaputra Lin if (!n) 773969447f2SStephen Suryaputra Lin n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); 7742c1a4311SWANG Cong if (!IS_ERR(n)) { 775e47a185bSDavid S. Miller if (!(n->nud_state & NUD_VALID)) { 776e47a185bSDavid S. Miller neigh_event_send(n, NULL); 777e47a185bSDavid S. Miller } else { 7780eeb075fSAndy Gospodarek if (fib_lookup(net, fl4, &res, 0) == 0) { 7794895c771SDavid S. Miller struct fib_nh *nh = &FIB_RES_NH(res); 7804895c771SDavid S. Miller 781aee06da6SJulian Anastasov update_or_create_fnhe(nh, fl4->daddr, new_gw, 782d52e5a7eSSabrina Dubroca 0, false, 783d52e5a7eSSabrina Dubroca jiffies + ip_rt_gc_timeout); 7844895c771SDavid S. Miller } 785ceb33206SDavid S. Miller if (kill_route) 786ceb33206SDavid S. Miller rt->dst.obsolete = DST_OBSOLETE_KILL; 787e47a185bSDavid S. Miller call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); 788e47a185bSDavid S. Miller } 789e47a185bSDavid S. Miller neigh_release(n); 790e47a185bSDavid S. Miller } 791e47a185bSDavid S. Miller return; 792e47a185bSDavid S. Miller 793e47a185bSDavid S. Miller reject_redirect: 794e47a185bSDavid S. Miller #ifdef CONFIG_IP_ROUTE_VERBOSE 79599ee038dSDavid S. Miller if (IN_DEV_LOG_MARTIANS(in_dev)) { 79699ee038dSDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 79799ee038dSDavid S. Miller __be32 daddr = iph->daddr; 79899ee038dSDavid S. Miller __be32 saddr = iph->saddr; 79999ee038dSDavid S. Miller 800e47a185bSDavid S. Miller net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" 801e47a185bSDavid S. Miller " Advised path = %pI4 -> %pI4\n", 802e47a185bSDavid S. Miller &old_gw, dev->name, &new_gw, 803e47a185bSDavid S. Miller &saddr, &daddr); 80499ee038dSDavid S. Miller } 805e47a185bSDavid S. Miller #endif 806e47a185bSDavid S. Miller ; 807e47a185bSDavid S. Miller } 808e47a185bSDavid S. Miller 8094895c771SDavid S. Miller static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 8104895c771SDavid S. Miller { 8114895c771SDavid S. Miller struct rtable *rt; 8124895c771SDavid S. Miller struct flowi4 fl4; 813f96ef988SMichal Kubecek const struct iphdr *iph = (const struct iphdr *) skb->data; 8147d995694SLorenzo Colitti struct net *net = dev_net(skb->dev); 815f96ef988SMichal Kubecek int oif = skb->dev->ifindex; 816f96ef988SMichal Kubecek u8 tos = RT_TOS(iph->tos); 817f96ef988SMichal Kubecek u8 prot = iph->protocol; 818f96ef988SMichal Kubecek u32 mark = skb->mark; 8194895c771SDavid S. Miller 8204895c771SDavid S. Miller rt = (struct rtable *) dst; 8214895c771SDavid S. Miller 8227d995694SLorenzo Colitti __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); 823ceb33206SDavid S. Miller __ip_do_redirect(rt, skb, &fl4, true); 8244895c771SDavid S. Miller } 8254895c771SDavid S. Miller 8261da177e4SLinus Torvalds static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 8271da177e4SLinus Torvalds { 8281da177e4SLinus Torvalds struct rtable *rt = (struct rtable *)dst; 8291da177e4SLinus Torvalds struct dst_entry *ret = dst; 8301da177e4SLinus Torvalds 8311da177e4SLinus Torvalds if (rt) { 832d11a4dc1STimo Teräs if (dst->obsolete > 0) { 8331da177e4SLinus Torvalds ip_rt_put(rt); 8341da177e4SLinus Torvalds ret = NULL; 8355943634fSDavid S. Miller } else if ((rt->rt_flags & RTCF_REDIRECTED) || 8365943634fSDavid S. Miller rt->dst.expires) { 83789aef892SDavid S. Miller ip_rt_put(rt); 8381da177e4SLinus Torvalds ret = NULL; 8391da177e4SLinus Torvalds } 8401da177e4SLinus Torvalds } 8411da177e4SLinus Torvalds return ret; 8421da177e4SLinus Torvalds } 8431da177e4SLinus Torvalds 8441da177e4SLinus Torvalds /* 8451da177e4SLinus Torvalds * Algorithm: 8461da177e4SLinus Torvalds * 1. The first ip_rt_redirect_number redirects are sent 8471da177e4SLinus Torvalds * with exponential backoff, then we stop sending them at all, 8481da177e4SLinus Torvalds * assuming that the host ignores our redirects. 8491da177e4SLinus Torvalds * 2. If we did not see packets requiring redirects 8501da177e4SLinus Torvalds * during ip_rt_redirect_silence, we assume that the host 8511da177e4SLinus Torvalds * forgot redirected route and start to send redirects again. 8521da177e4SLinus Torvalds * 8531da177e4SLinus Torvalds * This algorithm is much cheaper and more intelligent than dumb load limiting 8541da177e4SLinus Torvalds * in icmp.c. 8551da177e4SLinus Torvalds * 8561da177e4SLinus Torvalds * NOTE. Do not forget to inhibit load limiting for redirects (redundant) 8571da177e4SLinus Torvalds * and "frag. need" (breaks PMTU discovery) in icmp.c. 8581da177e4SLinus Torvalds */ 8591da177e4SLinus Torvalds 8601da177e4SLinus Torvalds void ip_rt_send_redirect(struct sk_buff *skb) 8611da177e4SLinus Torvalds { 862511c3f92SEric Dumazet struct rtable *rt = skb_rtable(skb); 86330038fc6SEric Dumazet struct in_device *in_dev; 86492d86829SDavid S. Miller struct inet_peer *peer; 8651d861aa4SDavid S. Miller struct net *net; 86630038fc6SEric Dumazet int log_martians; 867192132b9SDavid Ahern int vif; 8681da177e4SLinus Torvalds 86930038fc6SEric Dumazet rcu_read_lock(); 870d8d1f30bSChangli Gao in_dev = __in_dev_get_rcu(rt->dst.dev); 87130038fc6SEric Dumazet if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { 87230038fc6SEric Dumazet rcu_read_unlock(); 8731da177e4SLinus Torvalds return; 87430038fc6SEric Dumazet } 87530038fc6SEric Dumazet log_martians = IN_DEV_LOG_MARTIANS(in_dev); 876385add90SDavid Ahern vif = l3mdev_master_ifindex_rcu(rt->dst.dev); 87730038fc6SEric Dumazet rcu_read_unlock(); 8781da177e4SLinus Torvalds 8791d861aa4SDavid S. Miller net = dev_net(rt->dst.dev); 880192132b9SDavid Ahern peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1); 88192d86829SDavid S. Miller if (!peer) { 882e81da0e1SJulian Anastasov icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, 883e81da0e1SJulian Anastasov rt_nexthop(rt, ip_hdr(skb)->daddr)); 88492d86829SDavid S. Miller return; 88592d86829SDavid S. Miller } 88692d86829SDavid S. Miller 8871da177e4SLinus Torvalds /* No redirected packets during ip_rt_redirect_silence; 8881da177e4SLinus Torvalds * reset the algorithm. 8891da177e4SLinus Torvalds */ 89092d86829SDavid S. Miller if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) 89192d86829SDavid S. Miller peer->rate_tokens = 0; 8921da177e4SLinus Torvalds 8931da177e4SLinus Torvalds /* Too many ignored redirects; do not send anything 894d8d1f30bSChangli Gao * set dst.rate_last to the last seen redirected packet. 8951da177e4SLinus Torvalds */ 89692d86829SDavid S. Miller if (peer->rate_tokens >= ip_rt_redirect_number) { 89792d86829SDavid S. Miller peer->rate_last = jiffies; 8981d861aa4SDavid S. Miller goto out_put_peer; 8991da177e4SLinus Torvalds } 9001da177e4SLinus Torvalds 9011da177e4SLinus Torvalds /* Check for load limit; set rate_last to the latest sent 9021da177e4SLinus Torvalds * redirect. 9031da177e4SLinus Torvalds */ 90492d86829SDavid S. Miller if (peer->rate_tokens == 0 || 90514fb8a76SLi Yewang time_after(jiffies, 90692d86829SDavid S. Miller (peer->rate_last + 90792d86829SDavid S. Miller (ip_rt_redirect_load << peer->rate_tokens)))) { 908e81da0e1SJulian Anastasov __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); 909e81da0e1SJulian Anastasov 910e81da0e1SJulian Anastasov icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); 91192d86829SDavid S. Miller peer->rate_last = jiffies; 91292d86829SDavid S. Miller ++peer->rate_tokens; 9131da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE 91430038fc6SEric Dumazet if (log_martians && 915e87cc472SJoe Perches peer->rate_tokens == ip_rt_redirect_number) 916e87cc472SJoe Perches net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", 91792101b3bSDavid S. Miller &ip_hdr(skb)->saddr, inet_iif(skb), 918e81da0e1SJulian Anastasov &ip_hdr(skb)->daddr, &gw); 9191da177e4SLinus Torvalds #endif 9201da177e4SLinus Torvalds } 9211d861aa4SDavid S. Miller out_put_peer: 9221d861aa4SDavid S. Miller inet_putpeer(peer); 9231da177e4SLinus Torvalds } 9241da177e4SLinus Torvalds 9251da177e4SLinus Torvalds static int ip_error(struct sk_buff *skb) 9261da177e4SLinus Torvalds { 927511c3f92SEric Dumazet struct rtable *rt = skb_rtable(skb); 928e2c0dc1fSStephen Suryaputra struct net_device *dev = skb->dev; 929e2c0dc1fSStephen Suryaputra struct in_device *in_dev; 93092d86829SDavid S. Miller struct inet_peer *peer; 9311da177e4SLinus Torvalds unsigned long now; 932251da413SDavid S. Miller struct net *net; 93392d86829SDavid S. Miller bool send; 9341da177e4SLinus Torvalds int code; 9351da177e4SLinus Torvalds 936e2c0dc1fSStephen Suryaputra if (netif_is_l3_master(skb->dev)) { 937e2c0dc1fSStephen Suryaputra dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); 938e2c0dc1fSStephen Suryaputra if (!dev) 939e2c0dc1fSStephen Suryaputra goto out; 940e2c0dc1fSStephen Suryaputra } 941e2c0dc1fSStephen Suryaputra 942e2c0dc1fSStephen Suryaputra in_dev = __in_dev_get_rcu(dev); 943e2c0dc1fSStephen Suryaputra 944381c759dSEric W. Biederman /* IP on this device is disabled. */ 945381c759dSEric W. Biederman if (!in_dev) 946381c759dSEric W. Biederman goto out; 947381c759dSEric W. Biederman 948251da413SDavid S. Miller net = dev_net(rt->dst.dev); 949251da413SDavid S. Miller if (!IN_DEV_FORWARD(in_dev)) { 950251da413SDavid S. Miller switch (rt->dst.error) { 951251da413SDavid S. Miller case EHOSTUNREACH: 952b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); 953251da413SDavid S. Miller break; 954251da413SDavid S. Miller 955251da413SDavid S. Miller case ENETUNREACH: 956b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); 957251da413SDavid S. Miller break; 958251da413SDavid S. Miller } 959251da413SDavid S. Miller goto out; 960251da413SDavid S. Miller } 961251da413SDavid S. Miller 962d8d1f30bSChangli Gao switch (rt->dst.error) { 9631da177e4SLinus Torvalds case EINVAL: 9641da177e4SLinus Torvalds default: 9651da177e4SLinus Torvalds goto out; 9661da177e4SLinus Torvalds case EHOSTUNREACH: 9671da177e4SLinus Torvalds code = ICMP_HOST_UNREACH; 9681da177e4SLinus Torvalds break; 9691da177e4SLinus Torvalds case ENETUNREACH: 9701da177e4SLinus Torvalds code = ICMP_NET_UNREACH; 971b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); 9721da177e4SLinus Torvalds break; 9731da177e4SLinus Torvalds case EACCES: 9741da177e4SLinus Torvalds code = ICMP_PKT_FILTERED; 9751da177e4SLinus Torvalds break; 9761da177e4SLinus Torvalds } 9771da177e4SLinus Torvalds 978192132b9SDavid Ahern peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 979385add90SDavid Ahern l3mdev_master_ifindex(skb->dev), 1); 98092d86829SDavid S. Miller 98192d86829SDavid S. Miller send = true; 98292d86829SDavid S. Miller if (peer) { 9831da177e4SLinus Torvalds now = jiffies; 98492d86829SDavid S. Miller peer->rate_tokens += now - peer->rate_last; 98592d86829SDavid S. Miller if (peer->rate_tokens > ip_rt_error_burst) 98692d86829SDavid S. Miller peer->rate_tokens = ip_rt_error_burst; 98792d86829SDavid S. Miller peer->rate_last = now; 98892d86829SDavid S. Miller if (peer->rate_tokens >= ip_rt_error_cost) 98992d86829SDavid S. Miller peer->rate_tokens -= ip_rt_error_cost; 99092d86829SDavid S. Miller else 99192d86829SDavid S. Miller send = false; 9921d861aa4SDavid S. Miller inet_putpeer(peer); 9931da177e4SLinus Torvalds } 99492d86829SDavid S. Miller if (send) 99592d86829SDavid S. Miller icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 9961da177e4SLinus Torvalds 9971da177e4SLinus Torvalds out: kfree_skb(skb); 9981da177e4SLinus Torvalds return 0; 9991da177e4SLinus Torvalds } 10001da177e4SLinus Torvalds 1001d851c12bSSteffen Klassert static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) 10021da177e4SLinus Torvalds { 1003d851c12bSSteffen Klassert struct dst_entry *dst = &rt->dst; 10044895c771SDavid S. Miller struct fib_result res; 1005d52e5a7eSSabrina Dubroca bool lock = false; 10062c8cec5cSDavid S. Miller 1007d52e5a7eSSabrina Dubroca if (ip_mtu_locked(dst)) 1008fa1e492aSSteffen Klassert return; 1009fa1e492aSSteffen Klassert 1010cb6ccf09SHerbert Xu if (ipv4_mtu(dst) < mtu) 10113cdaa5beSLi Wei return; 10123cdaa5beSLi Wei 1013d52e5a7eSSabrina Dubroca if (mtu < ip_rt_min_pmtu) { 1014d52e5a7eSSabrina Dubroca lock = true; 10151da177e4SLinus Torvalds mtu = ip_rt_min_pmtu; 1016d52e5a7eSSabrina Dubroca } 101746af3180SHiroaki SHIMODA 1018f016229eSTimo Teräs if (rt->rt_pmtu == mtu && 1019f016229eSTimo Teräs time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) 1020f016229eSTimo Teräs return; 1021f016229eSTimo Teräs 1022c5ae7d41SEric Dumazet rcu_read_lock(); 10230eeb075fSAndy Gospodarek if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { 10244895c771SDavid S. Miller struct fib_nh *nh = &FIB_RES_NH(res); 10254895c771SDavid S. Miller 1026d52e5a7eSSabrina Dubroca update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, 1027aee06da6SJulian Anastasov jiffies + ip_rt_mtu_expires); 10284895c771SDavid S. Miller } 1029c5ae7d41SEric Dumazet rcu_read_unlock(); 10301da177e4SLinus Torvalds } 10311da177e4SLinus Torvalds 10324895c771SDavid S. Miller static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 10334895c771SDavid S. Miller struct sk_buff *skb, u32 mtu) 10344895c771SDavid S. Miller { 10354895c771SDavid S. Miller struct rtable *rt = (struct rtable *) dst; 10364895c771SDavid S. Miller struct flowi4 fl4; 10374895c771SDavid S. Miller 10384895c771SDavid S. Miller ip_rt_build_flow_key(&fl4, sk, skb); 1039d851c12bSSteffen Klassert __ip_rt_update_pmtu(rt, &fl4, mtu); 10404895c771SDavid S. Miller } 10414895c771SDavid S. Miller 104236393395SDavid S. Miller void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, 104336393395SDavid S. Miller int oif, u32 mark, u8 protocol, int flow_flags) 104436393395SDavid S. Miller { 104536393395SDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 104636393395SDavid S. Miller struct flowi4 fl4; 104736393395SDavid S. Miller struct rtable *rt; 104836393395SDavid S. Miller 10491b3c61dcSLorenzo Colitti if (!mark) 10501b3c61dcSLorenzo Colitti mark = IP4_REPLY_MARK(net, skb->mark); 10511b3c61dcSLorenzo Colitti 1052e2d118a1SLorenzo Colitti __build_flow_key(net, &fl4, NULL, iph, oif, 10534895c771SDavid S. Miller RT_TOS(iph->tos), protocol, mark, flow_flags); 105436393395SDavid S. Miller rt = __ip_route_output_key(net, &fl4); 105536393395SDavid S. Miller if (!IS_ERR(rt)) { 10564895c771SDavid S. Miller __ip_rt_update_pmtu(rt, &fl4, mtu); 105736393395SDavid S. Miller ip_rt_put(rt); 105836393395SDavid S. Miller } 105936393395SDavid S. Miller } 106036393395SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_update_pmtu); 106136393395SDavid S. Miller 10629cb3a50cSSteffen Klassert static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) 106336393395SDavid S. Miller { 10644895c771SDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 10654895c771SDavid S. Miller struct flowi4 fl4; 10664895c771SDavid S. Miller struct rtable *rt; 106736393395SDavid S. Miller 1068e2d118a1SLorenzo Colitti __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); 10691b3c61dcSLorenzo Colitti 10701b3c61dcSLorenzo Colitti if (!fl4.flowi4_mark) 10711b3c61dcSLorenzo Colitti fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); 10721b3c61dcSLorenzo Colitti 10734895c771SDavid S. Miller rt = __ip_route_output_key(sock_net(sk), &fl4); 10744895c771SDavid S. Miller if (!IS_ERR(rt)) { 10754895c771SDavid S. Miller __ip_rt_update_pmtu(rt, &fl4, mtu); 10764895c771SDavid S. Miller ip_rt_put(rt); 10774895c771SDavid S. Miller } 107836393395SDavid S. Miller } 10799cb3a50cSSteffen Klassert 10809cb3a50cSSteffen Klassert void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) 10819cb3a50cSSteffen Klassert { 10829cb3a50cSSteffen Klassert const struct iphdr *iph = (const struct iphdr *) skb->data; 10839cb3a50cSSteffen Klassert struct flowi4 fl4; 10849cb3a50cSSteffen Klassert struct rtable *rt; 10857f502361SEric Dumazet struct dst_entry *odst = NULL; 1086b44108dbSSteffen Klassert bool new = false; 1087e2d118a1SLorenzo Colitti struct net *net = sock_net(sk); 10889cb3a50cSSteffen Klassert 10899cb3a50cSSteffen Klassert bh_lock_sock(sk); 1090482fc609SHannes Frederic Sowa 1091482fc609SHannes Frederic Sowa if (!ip_sk_accept_pmtu(sk)) 1092482fc609SHannes Frederic Sowa goto out; 1093482fc609SHannes Frederic Sowa 10947f502361SEric Dumazet odst = sk_dst_get(sk); 10959cb3a50cSSteffen Klassert 10967f502361SEric Dumazet if (sock_owned_by_user(sk) || !odst) { 10979cb3a50cSSteffen Klassert __ipv4_sk_update_pmtu(skb, sk, mtu); 10989cb3a50cSSteffen Klassert goto out; 10999cb3a50cSSteffen Klassert } 11009cb3a50cSSteffen Klassert 1101e2d118a1SLorenzo Colitti __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); 11029cb3a50cSSteffen Klassert 11037f502361SEric Dumazet rt = (struct rtable *)odst; 110451456b29SIan Morris if (odst->obsolete && !odst->ops->check(odst, 0)) { 11059cb3a50cSSteffen Klassert rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 11069cb3a50cSSteffen Klassert if (IS_ERR(rt)) 11079cb3a50cSSteffen Klassert goto out; 1108b44108dbSSteffen Klassert 1109b44108dbSSteffen Klassert new = true; 11109cb3a50cSSteffen Klassert } 11119cb3a50cSSteffen Klassert 11120f6c480fSDavid Miller __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu); 11139cb3a50cSSteffen Klassert 11147f502361SEric Dumazet if (!dst_check(&rt->dst, 0)) { 1115b44108dbSSteffen Klassert if (new) 1116b44108dbSSteffen Klassert dst_release(&rt->dst); 1117b44108dbSSteffen Klassert 11189cb3a50cSSteffen Klassert rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 11199cb3a50cSSteffen Klassert if (IS_ERR(rt)) 11209cb3a50cSSteffen Klassert goto out; 11219cb3a50cSSteffen Klassert 1122b44108dbSSteffen Klassert new = true; 11239cb3a50cSSteffen Klassert } 11249cb3a50cSSteffen Klassert 1125b44108dbSSteffen Klassert if (new) 11267f502361SEric Dumazet sk_dst_set(sk, &rt->dst); 11279cb3a50cSSteffen Klassert 11289cb3a50cSSteffen Klassert out: 11299cb3a50cSSteffen Klassert bh_unlock_sock(sk); 11307f502361SEric Dumazet dst_release(odst); 11319cb3a50cSSteffen Klassert } 113236393395SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); 1133f39925dbSDavid S. Miller 1134b42597e2SDavid S. Miller void ipv4_redirect(struct sk_buff *skb, struct net *net, 1135b42597e2SDavid S. Miller int oif, u32 mark, u8 protocol, int flow_flags) 1136b42597e2SDavid S. Miller { 1137b42597e2SDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 1138b42597e2SDavid S. Miller struct flowi4 fl4; 1139b42597e2SDavid S. Miller struct rtable *rt; 1140b42597e2SDavid S. Miller 1141e2d118a1SLorenzo Colitti __build_flow_key(net, &fl4, NULL, iph, oif, 11424895c771SDavid S. Miller RT_TOS(iph->tos), protocol, mark, flow_flags); 1143b42597e2SDavid S. Miller rt = __ip_route_output_key(net, &fl4); 1144b42597e2SDavid S. Miller if (!IS_ERR(rt)) { 1145ceb33206SDavid S. Miller __ip_do_redirect(rt, skb, &fl4, false); 1146b42597e2SDavid S. Miller ip_rt_put(rt); 1147b42597e2SDavid S. Miller } 1148b42597e2SDavid S. Miller } 1149b42597e2SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_redirect); 1150b42597e2SDavid S. Miller 1151b42597e2SDavid S. Miller void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) 1152b42597e2SDavid S. Miller { 11534895c771SDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 11544895c771SDavid S. Miller struct flowi4 fl4; 11554895c771SDavid S. Miller struct rtable *rt; 1156e2d118a1SLorenzo Colitti struct net *net = sock_net(sk); 1157b42597e2SDavid S. Miller 1158e2d118a1SLorenzo Colitti __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); 1159e2d118a1SLorenzo Colitti rt = __ip_route_output_key(net, &fl4); 11604895c771SDavid S. Miller if (!IS_ERR(rt)) { 1161ceb33206SDavid S. Miller __ip_do_redirect(rt, skb, &fl4, false); 11624895c771SDavid S. Miller ip_rt_put(rt); 11634895c771SDavid S. Miller } 1164b42597e2SDavid S. Miller } 1165b42597e2SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_sk_redirect); 1166b42597e2SDavid S. Miller 1167efbc368dSDavid S. Miller static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1168efbc368dSDavid S. Miller { 1169efbc368dSDavid S. Miller struct rtable *rt = (struct rtable *) dst; 1170efbc368dSDavid S. Miller 1171ceb33206SDavid S. Miller /* All IPV4 dsts are created with ->obsolete set to the value 1172ceb33206SDavid S. Miller * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1173ceb33206SDavid S. Miller * into this function always. 1174ceb33206SDavid S. Miller * 1175387aa65aSTimo Teräs * When a PMTU/redirect information update invalidates a route, 1176387aa65aSTimo Teräs * this is indicated by setting obsolete to DST_OBSOLETE_KILL or 1177387aa65aSTimo Teräs * DST_OBSOLETE_DEAD by dst_free(). 1178ceb33206SDavid S. Miller */ 1179387aa65aSTimo Teräs if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) 1180efbc368dSDavid S. Miller return NULL; 1181d11a4dc1STimo Teräs return dst; 11821da177e4SLinus Torvalds } 11831da177e4SLinus Torvalds 11841da177e4SLinus Torvalds static void ipv4_link_failure(struct sk_buff *skb) 11851da177e4SLinus Torvalds { 11861da177e4SLinus Torvalds struct rtable *rt; 11871da177e4SLinus Torvalds 11881da177e4SLinus Torvalds icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 11891da177e4SLinus Torvalds 1190511c3f92SEric Dumazet rt = skb_rtable(skb); 11915943634fSDavid S. Miller if (rt) 11925943634fSDavid S. Miller dst_set_expires(&rt->dst, 0); 11932c8cec5cSDavid S. Miller } 11941da177e4SLinus Torvalds 1195ede2059dSEric W. Biederman static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb) 11961da177e4SLinus Torvalds { 119791df42beSJoe Perches pr_debug("%s: %pI4 -> %pI4, %s\n", 119891df42beSJoe Perches __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 11991da177e4SLinus Torvalds skb->dev ? skb->dev->name : "?"); 12001da177e4SLinus Torvalds kfree_skb(skb); 1201c378a9c0SDave Jones WARN_ON(1); 12021da177e4SLinus Torvalds return 0; 12031da177e4SLinus Torvalds } 12041da177e4SLinus Torvalds 12051da177e4SLinus Torvalds /* 12061da177e4SLinus Torvalds We do not cache source address of outgoing interface, 12071da177e4SLinus Torvalds because it is used only by IP RR, TS and SRR options, 12081da177e4SLinus Torvalds so that it out of fast path. 12091da177e4SLinus Torvalds 12101da177e4SLinus Torvalds BTW remember: "addr" is allowed to be not aligned 12111da177e4SLinus Torvalds in IP options! 12121da177e4SLinus Torvalds */ 12131da177e4SLinus Torvalds 12148e36360aSDavid S. Miller void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) 12151da177e4SLinus Torvalds { 1216a61ced5dSAl Viro __be32 src; 12171da177e4SLinus Torvalds 1218c7537967SDavid S. Miller if (rt_is_output_route(rt)) 1219c5be24ffSDavid S. Miller src = ip_hdr(skb)->saddr; 1220ebc0ffaeSEric Dumazet else { 12218e36360aSDavid S. Miller struct fib_result res; 12228e36360aSDavid S. Miller struct flowi4 fl4; 12238e36360aSDavid S. Miller struct iphdr *iph; 12248e36360aSDavid S. Miller 12258e36360aSDavid S. Miller iph = ip_hdr(skb); 12268e36360aSDavid S. Miller 12278e36360aSDavid S. Miller memset(&fl4, 0, sizeof(fl4)); 12288e36360aSDavid S. Miller fl4.daddr = iph->daddr; 12298e36360aSDavid S. Miller fl4.saddr = iph->saddr; 1230b0fe4a31SJulian Anastasov fl4.flowi4_tos = RT_TOS(iph->tos); 12318e36360aSDavid S. Miller fl4.flowi4_oif = rt->dst.dev->ifindex; 12328e36360aSDavid S. Miller fl4.flowi4_iif = skb->dev->ifindex; 12338e36360aSDavid S. Miller fl4.flowi4_mark = skb->mark; 12345e2b61f7SDavid S. Miller 1235ebc0ffaeSEric Dumazet rcu_read_lock(); 12360eeb075fSAndy Gospodarek if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) 1237436c3b66SDavid S. Miller src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); 1238ebc0ffaeSEric Dumazet else 1239f8126f1dSDavid S. Miller src = inet_select_addr(rt->dst.dev, 1240f8126f1dSDavid S. Miller rt_nexthop(rt, iph->daddr), 12411da177e4SLinus Torvalds RT_SCOPE_UNIVERSE); 1242ebc0ffaeSEric Dumazet rcu_read_unlock(); 1243ebc0ffaeSEric Dumazet } 12441da177e4SLinus Torvalds memcpy(addr, &src, 4); 12451da177e4SLinus Torvalds } 12461da177e4SLinus Torvalds 1247c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 12481da177e4SLinus Torvalds static void set_class_tag(struct rtable *rt, u32 tag) 12491da177e4SLinus Torvalds { 1250d8d1f30bSChangli Gao if (!(rt->dst.tclassid & 0xFFFF)) 1251d8d1f30bSChangli Gao rt->dst.tclassid |= tag & 0xFFFF; 1252d8d1f30bSChangli Gao if (!(rt->dst.tclassid & 0xFFFF0000)) 1253d8d1f30bSChangli Gao rt->dst.tclassid |= tag & 0xFFFF0000; 12541da177e4SLinus Torvalds } 12551da177e4SLinus Torvalds #endif 12561da177e4SLinus Torvalds 12570dbaee3bSDavid S. Miller static unsigned int ipv4_default_advmss(const struct dst_entry *dst) 12580dbaee3bSDavid S. Miller { 12597ed14d97SGao Feng unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); 1260164a5e7aSEric Dumazet unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, 12610dbaee3bSDavid S. Miller ip_rt_min_advmss); 12627ed14d97SGao Feng 12637ed14d97SGao Feng return min(advmss, IPV4_MAX_PMTU - header_size); 12640dbaee3bSDavid S. Miller } 12650dbaee3bSDavid S. Miller 1266ebb762f2SSteffen Klassert static unsigned int ipv4_mtu(const struct dst_entry *dst) 1267d33e4553SDavid S. Miller { 1268261663b0SSteffen Klassert const struct rtable *rt = (const struct rtable *) dst; 12695943634fSDavid S. Miller unsigned int mtu = rt->rt_pmtu; 12705943634fSDavid S. Miller 127198d75c37SAlexander Duyck if (!mtu || time_after_eq(jiffies, rt->dst.expires)) 12725943634fSDavid S. Miller mtu = dst_metric_raw(dst, RTAX_MTU); 1273618f9bc7SSteffen Klassert 127438d523e2SSteffen Klassert if (mtu) 1275618f9bc7SSteffen Klassert return mtu; 1276618f9bc7SSteffen Klassert 1277c780a049SEric Dumazet mtu = READ_ONCE(dst->dev->mtu); 1278d33e4553SDavid S. Miller 1279d52e5a7eSSabrina Dubroca if (unlikely(ip_mtu_locked(dst))) { 1280155e8336SJulian Anastasov if (rt->rt_uses_gateway && mtu > 576) 1281d33e4553SDavid S. Miller mtu = 576; 1282d33e4553SDavid S. Miller } 1283d33e4553SDavid S. Miller 128414972cbdSRoopa Prabhu mtu = min_t(unsigned int, mtu, IP_MAX_MTU); 128514972cbdSRoopa Prabhu 128614972cbdSRoopa Prabhu return mtu - lwtunnel_headroom(dst->lwtstate, mtu); 1287d33e4553SDavid S. Miller } 1288d33e4553SDavid S. Miller 128994720e3aSJulian Anastasov static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) 129094720e3aSJulian Anastasov { 129194720e3aSJulian Anastasov struct fnhe_hash_bucket *hash; 129294720e3aSJulian Anastasov struct fib_nh_exception *fnhe, __rcu **fnhe_p; 129394720e3aSJulian Anastasov u32 hval = fnhe_hashfun(daddr); 129494720e3aSJulian Anastasov 129594720e3aSJulian Anastasov spin_lock_bh(&fnhe_lock); 129694720e3aSJulian Anastasov 129794720e3aSJulian Anastasov hash = rcu_dereference_protected(nh->nh_exceptions, 129894720e3aSJulian Anastasov lockdep_is_held(&fnhe_lock)); 129994720e3aSJulian Anastasov hash += hval; 130094720e3aSJulian Anastasov 130194720e3aSJulian Anastasov fnhe_p = &hash->chain; 130294720e3aSJulian Anastasov fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); 130394720e3aSJulian Anastasov while (fnhe) { 130494720e3aSJulian Anastasov if (fnhe->fnhe_daddr == daddr) { 130594720e3aSJulian Anastasov rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( 130694720e3aSJulian Anastasov fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); 130794720e3aSJulian Anastasov fnhe_flush_routes(fnhe); 130894720e3aSJulian Anastasov kfree_rcu(fnhe, rcu); 130994720e3aSJulian Anastasov break; 131094720e3aSJulian Anastasov } 131194720e3aSJulian Anastasov fnhe_p = &fnhe->fnhe_next; 131294720e3aSJulian Anastasov fnhe = rcu_dereference_protected(fnhe->fnhe_next, 131394720e3aSJulian Anastasov lockdep_is_held(&fnhe_lock)); 131494720e3aSJulian Anastasov } 131594720e3aSJulian Anastasov 131694720e3aSJulian Anastasov spin_unlock_bh(&fnhe_lock); 131794720e3aSJulian Anastasov } 131894720e3aSJulian Anastasov 1319f2bb4bedSDavid S. Miller static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) 13204895c771SDavid S. Miller { 1321caa41527SEric Dumazet struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); 13224895c771SDavid S. Miller struct fib_nh_exception *fnhe; 13234895c771SDavid S. Miller u32 hval; 13244895c771SDavid S. Miller 1325f2bb4bedSDavid S. Miller if (!hash) 1326f2bb4bedSDavid S. Miller return NULL; 1327f2bb4bedSDavid S. Miller 1328d3a25c98SDavid S. Miller hval = fnhe_hashfun(daddr); 13294895c771SDavid S. Miller 13304895c771SDavid S. Miller for (fnhe = rcu_dereference(hash[hval].chain); fnhe; 13314895c771SDavid S. Miller fnhe = rcu_dereference(fnhe->fnhe_next)) { 133294720e3aSJulian Anastasov if (fnhe->fnhe_daddr == daddr) { 133394720e3aSJulian Anastasov if (fnhe->fnhe_expires && 133494720e3aSJulian Anastasov time_after(jiffies, fnhe->fnhe_expires)) { 133594720e3aSJulian Anastasov ip_del_fnhe(nh, daddr); 133694720e3aSJulian Anastasov break; 133794720e3aSJulian Anastasov } 1338f2bb4bedSDavid S. Miller return fnhe; 1339f2bb4bedSDavid S. Miller } 134094720e3aSJulian Anastasov } 1341f2bb4bedSDavid S. Miller return NULL; 1342f2bb4bedSDavid S. Miller } 1343f2bb4bedSDavid S. Miller 134450d889b1SDavid Ahern /* MTU selection: 134550d889b1SDavid Ahern * 1. mtu on route is locked - use it 134650d889b1SDavid Ahern * 2. mtu from nexthop exception 134750d889b1SDavid Ahern * 3. mtu from egress device 134850d889b1SDavid Ahern */ 134950d889b1SDavid Ahern 135050d889b1SDavid Ahern u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) 135150d889b1SDavid Ahern { 135250d889b1SDavid Ahern struct fib_info *fi = res->fi; 135350d889b1SDavid Ahern struct fib_nh *nh = &fi->fib_nh[res->nh_sel]; 135450d889b1SDavid Ahern struct net_device *dev = nh->nh_dev; 135550d889b1SDavid Ahern u32 mtu = 0; 135650d889b1SDavid Ahern 135750d889b1SDavid Ahern if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || 135850d889b1SDavid Ahern fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) 135950d889b1SDavid Ahern mtu = fi->fib_mtu; 136050d889b1SDavid Ahern 136150d889b1SDavid Ahern if (likely(!mtu)) { 136250d889b1SDavid Ahern struct fib_nh_exception *fnhe; 136350d889b1SDavid Ahern 136450d889b1SDavid Ahern fnhe = find_exception(nh, daddr); 136550d889b1SDavid Ahern if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) 136650d889b1SDavid Ahern mtu = fnhe->fnhe_pmtu; 136750d889b1SDavid Ahern } 136850d889b1SDavid Ahern 136950d889b1SDavid Ahern if (likely(!mtu)) 137050d889b1SDavid Ahern mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); 137150d889b1SDavid Ahern 137250d889b1SDavid Ahern return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu); 137350d889b1SDavid Ahern } 137450d889b1SDavid Ahern 1375caacf05eSDavid S. Miller static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 1376a4c2fd7fSWei Wang __be32 daddr, const bool do_cache) 1377f2bb4bedSDavid S. Miller { 1378caacf05eSDavid S. Miller bool ret = false; 1379caacf05eSDavid S. Miller 1380c5038a83SDavid S. Miller spin_lock_bh(&fnhe_lock); 1381aee06da6SJulian Anastasov 1382c5038a83SDavid S. Miller if (daddr == fnhe->fnhe_daddr) { 13832ffae99dSTimo Teräs struct rtable __rcu **porig; 13842ffae99dSTimo Teräs struct rtable *orig; 13855aad1de5STimo Teräs int genid = fnhe_genid(dev_net(rt->dst.dev)); 13862ffae99dSTimo Teräs 13872ffae99dSTimo Teräs if (rt_is_input_route(rt)) 13882ffae99dSTimo Teräs porig = &fnhe->fnhe_rth_input; 13892ffae99dSTimo Teräs else 13902ffae99dSTimo Teräs porig = &fnhe->fnhe_rth_output; 13912ffae99dSTimo Teräs orig = rcu_dereference(*porig); 13925aad1de5STimo Teräs 13935aad1de5STimo Teräs if (fnhe->fnhe_genid != genid) { 13945aad1de5STimo Teräs fnhe->fnhe_genid = genid; 139513d82bf5SSteffen Klassert fnhe->fnhe_gw = 0; 139613d82bf5SSteffen Klassert fnhe->fnhe_pmtu = 0; 139713d82bf5SSteffen Klassert fnhe->fnhe_expires = 0; 13980e8411e4SHangbin Liu fnhe->fnhe_mtu_locked = false; 13992ffae99dSTimo Teräs fnhe_flush_routes(fnhe); 14002ffae99dSTimo Teräs orig = NULL; 140113d82bf5SSteffen Klassert } 1402387aa65aSTimo Teräs fill_route_from_fnhe(rt, fnhe); 1403387aa65aSTimo Teräs if (!rt->rt_gateway) 1404155e8336SJulian Anastasov rt->rt_gateway = daddr; 1405f2bb4bedSDavid S. Miller 1406a4c2fd7fSWei Wang if (do_cache) { 14070830106cSWei Wang dst_hold(&rt->dst); 14082ffae99dSTimo Teräs rcu_assign_pointer(*porig, rt); 14090830106cSWei Wang if (orig) { 141095c47f9cSWei Wang dst_dev_put(&orig->dst); 14110830106cSWei Wang dst_release(&orig->dst); 14120830106cSWei Wang } 14132ffae99dSTimo Teräs ret = true; 14142ffae99dSTimo Teräs } 1415c5038a83SDavid S. Miller 1416c5038a83SDavid S. Miller fnhe->fnhe_stamp = jiffies; 1417c5038a83SDavid S. Miller } 1418c5038a83SDavid S. Miller spin_unlock_bh(&fnhe_lock); 1419caacf05eSDavid S. Miller 1420caacf05eSDavid S. Miller return ret; 142154764bb6SEric Dumazet } 142254764bb6SEric Dumazet 1423caacf05eSDavid S. Miller static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1424f2bb4bedSDavid S. Miller { 1425d26b3a7cSEric Dumazet struct rtable *orig, *prev, **p; 1426caacf05eSDavid S. Miller bool ret = true; 1427f2bb4bedSDavid S. Miller 1428d26b3a7cSEric Dumazet if (rt_is_input_route(rt)) { 142954764bb6SEric Dumazet p = (struct rtable **)&nh->nh_rth_input; 1430d26b3a7cSEric Dumazet } else { 1431903ceff7SChristoph Lameter p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); 1432d26b3a7cSEric Dumazet } 1433f2bb4bedSDavid S. Miller orig = *p; 1434f2bb4bedSDavid S. Miller 14350830106cSWei Wang /* hold dst before doing cmpxchg() to avoid race condition 14360830106cSWei Wang * on this dst 14370830106cSWei Wang */ 14380830106cSWei Wang dst_hold(&rt->dst); 1439f2bb4bedSDavid S. Miller prev = cmpxchg(p, orig, rt); 1440f2bb4bedSDavid S. Miller if (prev == orig) { 14410830106cSWei Wang if (orig) { 144295c47f9cSWei Wang dst_dev_put(&orig->dst); 14430830106cSWei Wang dst_release(&orig->dst); 14440830106cSWei Wang } 14450830106cSWei Wang } else { 14460830106cSWei Wang dst_release(&rt->dst); 1447caacf05eSDavid S. Miller ret = false; 14480830106cSWei Wang } 1449caacf05eSDavid S. Miller 1450caacf05eSDavid S. Miller return ret; 1451caacf05eSDavid S. Miller } 1452caacf05eSDavid S. Miller 14535055c371SEric Dumazet struct uncached_list { 14545055c371SEric Dumazet spinlock_t lock; 14555055c371SEric Dumazet struct list_head head; 14565055c371SEric Dumazet }; 14575055c371SEric Dumazet 14585055c371SEric Dumazet static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); 1459caacf05eSDavid S. Miller 1460510c321bSXin Long void rt_add_uncached_list(struct rtable *rt) 1461caacf05eSDavid S. Miller { 14625055c371SEric Dumazet struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); 14635055c371SEric Dumazet 14645055c371SEric Dumazet rt->rt_uncached_list = ul; 14655055c371SEric Dumazet 14665055c371SEric Dumazet spin_lock_bh(&ul->lock); 14675055c371SEric Dumazet list_add_tail(&rt->rt_uncached, &ul->head); 14685055c371SEric Dumazet spin_unlock_bh(&ul->lock); 1469caacf05eSDavid S. Miller } 1470caacf05eSDavid S. Miller 1471510c321bSXin Long void rt_del_uncached_list(struct rtable *rt) 1472510c321bSXin Long { 1473510c321bSXin Long if (!list_empty(&rt->rt_uncached)) { 1474510c321bSXin Long struct uncached_list *ul = rt->rt_uncached_list; 1475510c321bSXin Long 1476510c321bSXin Long spin_lock_bh(&ul->lock); 1477510c321bSXin Long list_del(&rt->rt_uncached); 1478510c321bSXin Long spin_unlock_bh(&ul->lock); 1479510c321bSXin Long } 1480510c321bSXin Long } 1481510c321bSXin Long 1482caacf05eSDavid S. Miller static void ipv4_dst_destroy(struct dst_entry *dst) 1483caacf05eSDavid S. Miller { 14843fb07dafSEric Dumazet struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); 1485caacf05eSDavid S. Miller struct rtable *rt = (struct rtable *)dst; 1486caacf05eSDavid S. Miller 14879620fef2SEric Dumazet if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) 14883fb07dafSEric Dumazet kfree(p); 14893fb07dafSEric Dumazet 1490510c321bSXin Long rt_del_uncached_list(rt); 1491caacf05eSDavid S. Miller } 1492caacf05eSDavid S. Miller 1493caacf05eSDavid S. Miller void rt_flush_dev(struct net_device *dev) 1494caacf05eSDavid S. Miller { 1495caacf05eSDavid S. Miller struct net *net = dev_net(dev); 1496caacf05eSDavid S. Miller struct rtable *rt; 14975055c371SEric Dumazet int cpu; 1498caacf05eSDavid S. Miller 14995055c371SEric Dumazet for_each_possible_cpu(cpu) { 15005055c371SEric Dumazet struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); 15015055c371SEric Dumazet 15025055c371SEric Dumazet spin_lock_bh(&ul->lock); 15035055c371SEric Dumazet list_for_each_entry(rt, &ul->head, rt_uncached) { 1504caacf05eSDavid S. Miller if (rt->dst.dev != dev) 1505caacf05eSDavid S. Miller continue; 1506caacf05eSDavid S. Miller rt->dst.dev = net->loopback_dev; 1507caacf05eSDavid S. Miller dev_hold(rt->dst.dev); 1508caacf05eSDavid S. Miller dev_put(dev); 1509caacf05eSDavid S. Miller } 15105055c371SEric Dumazet spin_unlock_bh(&ul->lock); 15114895c771SDavid S. Miller } 15124895c771SDavid S. Miller } 15134895c771SDavid S. Miller 15144331debcSEric Dumazet static bool rt_cache_valid(const struct rtable *rt) 1515d2d68ba9SDavid S. Miller { 15164331debcSEric Dumazet return rt && 15174331debcSEric Dumazet rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 15184331debcSEric Dumazet !rt_is_expired(rt); 1519d2d68ba9SDavid S. Miller } 1520d2d68ba9SDavid S. Miller 1521f2bb4bedSDavid S. Miller static void rt_set_nexthop(struct rtable *rt, __be32 daddr, 15225e2b61f7SDavid S. Miller const struct fib_result *res, 1523f2bb4bedSDavid S. Miller struct fib_nh_exception *fnhe, 1524a4c2fd7fSWei Wang struct fib_info *fi, u16 type, u32 itag, 1525a4c2fd7fSWei Wang const bool do_cache) 15261da177e4SLinus Torvalds { 1527caacf05eSDavid S. Miller bool cached = false; 1528caacf05eSDavid S. Miller 15291da177e4SLinus Torvalds if (fi) { 15304895c771SDavid S. Miller struct fib_nh *nh = &FIB_RES_NH(*res); 15314895c771SDavid S. Miller 1532155e8336SJulian Anastasov if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { 15334895c771SDavid S. Miller rt->rt_gateway = nh->nh_gw; 1534155e8336SJulian Anastasov rt->rt_uses_gateway = 1; 1535155e8336SJulian Anastasov } 15363fb07dafSEric Dumazet dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); 15373fb07dafSEric Dumazet if (fi->fib_metrics != &dst_default_metrics) { 15383fb07dafSEric Dumazet rt->dst._metrics |= DST_METRICS_REFCOUNTED; 15399620fef2SEric Dumazet refcount_inc(&fi->fib_metrics->refcnt); 15403fb07dafSEric Dumazet } 1541c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1542f2bb4bedSDavid S. Miller rt->dst.tclassid = nh->nh_tclassid; 15431da177e4SLinus Torvalds #endif 154461adedf3SJiri Benc rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); 1545c5038a83SDavid S. Miller if (unlikely(fnhe)) 1546a4c2fd7fSWei Wang cached = rt_bind_exception(rt, fnhe, daddr, do_cache); 1547a4c2fd7fSWei Wang else if (do_cache) 1548caacf05eSDavid S. Miller cached = rt_cache_route(nh, rt); 1549155e8336SJulian Anastasov if (unlikely(!cached)) { 1550155e8336SJulian Anastasov /* Routes we intend to cache in nexthop exception or 1551155e8336SJulian Anastasov * FIB nexthop have the DST_NOCACHE bit clear. 1552155e8336SJulian Anastasov * However, if we are unsuccessful at storing this 1553155e8336SJulian Anastasov * route into the cache we really need to set it. 1554155e8336SJulian Anastasov */ 1555155e8336SJulian Anastasov if (!rt->rt_gateway) 1556155e8336SJulian Anastasov rt->rt_gateway = daddr; 1557155e8336SJulian Anastasov rt_add_uncached_list(rt); 1558d33e4553SDavid S. Miller } 1559155e8336SJulian Anastasov } else 1560caacf05eSDavid S. Miller rt_add_uncached_list(rt); 15611da177e4SLinus Torvalds 1562c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 15631da177e4SLinus Torvalds #ifdef CONFIG_IP_MULTIPLE_TABLES 156485b91b03SDavid S. Miller set_class_tag(rt, res->tclassid); 15651da177e4SLinus Torvalds #endif 15661da177e4SLinus Torvalds set_class_tag(rt, itag); 15671da177e4SLinus Torvalds #endif 15681da177e4SLinus Torvalds } 15691da177e4SLinus Torvalds 15709ab179d8SDavid Ahern struct rtable *rt_dst_alloc(struct net_device *dev, 1571d08c4f35SDavid Ahern unsigned int flags, u16 type, 1572f2bb4bedSDavid S. Miller bool nopolicy, bool noxfrm, bool will_cache) 15730c4dcd58SDavid S. Miller { 1574d08c4f35SDavid Ahern struct rtable *rt; 1575d08c4f35SDavid Ahern 1576d08c4f35SDavid Ahern rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, 1577a4c2fd7fSWei Wang (will_cache ? 0 : DST_HOST) | 15780c4dcd58SDavid S. Miller (nopolicy ? DST_NOPOLICY : 0) | 1579b2a9c0edSWei Wang (noxfrm ? DST_NOXFRM : 0)); 1580d08c4f35SDavid Ahern 1581d08c4f35SDavid Ahern if (rt) { 1582d08c4f35SDavid Ahern rt->rt_genid = rt_genid_ipv4(dev_net(dev)); 1583d08c4f35SDavid Ahern rt->rt_flags = flags; 1584d08c4f35SDavid Ahern rt->rt_type = type; 1585d08c4f35SDavid Ahern rt->rt_is_input = 0; 1586d08c4f35SDavid Ahern rt->rt_iif = 0; 1587d08c4f35SDavid Ahern rt->rt_pmtu = 0; 1588d52e5a7eSSabrina Dubroca rt->rt_mtu_locked = 0; 1589d08c4f35SDavid Ahern rt->rt_gateway = 0; 1590d08c4f35SDavid Ahern rt->rt_uses_gateway = 0; 1591d08c4f35SDavid Ahern INIT_LIST_HEAD(&rt->rt_uncached); 1592d08c4f35SDavid Ahern 1593d08c4f35SDavid Ahern rt->dst.output = ip_output; 1594d08c4f35SDavid Ahern if (flags & RTCF_LOCAL) 1595d08c4f35SDavid Ahern rt->dst.input = ip_local_deliver; 1596d08c4f35SDavid Ahern } 1597d08c4f35SDavid Ahern 1598d08c4f35SDavid Ahern return rt; 15990c4dcd58SDavid S. Miller } 16009ab179d8SDavid Ahern EXPORT_SYMBOL(rt_dst_alloc); 16010c4dcd58SDavid S. Miller 160296d36220SEric Dumazet /* called in rcu_read_lock() section */ 1603bc044e8dSPaolo Abeni int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1604bc044e8dSPaolo Abeni u8 tos, struct net_device *dev, 1605bc044e8dSPaolo Abeni struct in_device *in_dev, u32 *itag) 16061da177e4SLinus Torvalds { 1607b5f7e755SEric Dumazet int err; 16081da177e4SLinus Torvalds 16091da177e4SLinus Torvalds /* Primary sanity checks. */ 161051456b29SIan Morris if (!in_dev) 16111da177e4SLinus Torvalds return -EINVAL; 16121da177e4SLinus Torvalds 16131e637c74SJan Engelhardt if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1614d0daebc3SThomas Graf skb->protocol != htons(ETH_P_IP)) 1615bc044e8dSPaolo Abeni return -EINVAL; 1616d0daebc3SThomas Graf 161775fea73dSAlexander Duyck if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) 1618bc044e8dSPaolo Abeni return -EINVAL; 16191da177e4SLinus Torvalds 1620f97c1e0cSJoe Perches if (ipv4_is_zeronet(saddr)) { 1621f97c1e0cSJoe Perches if (!ipv4_is_local_multicast(daddr)) 1622bc044e8dSPaolo Abeni return -EINVAL; 1623b5f7e755SEric Dumazet } else { 16249e56e380SDavid S. Miller err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 1625bc044e8dSPaolo Abeni in_dev, itag); 1626b5f7e755SEric Dumazet if (err < 0) 1627bc044e8dSPaolo Abeni return err; 1628b5f7e755SEric Dumazet } 1629bc044e8dSPaolo Abeni return 0; 1630bc044e8dSPaolo Abeni } 1631bc044e8dSPaolo Abeni 1632bc044e8dSPaolo Abeni /* called in rcu_read_lock() section */ 1633bc044e8dSPaolo Abeni static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1634bc044e8dSPaolo Abeni u8 tos, struct net_device *dev, int our) 1635bc044e8dSPaolo Abeni { 1636bc044e8dSPaolo Abeni struct in_device *in_dev = __in_dev_get_rcu(dev); 1637bc044e8dSPaolo Abeni unsigned int flags = RTCF_MULTICAST; 1638bc044e8dSPaolo Abeni struct rtable *rth; 1639bc044e8dSPaolo Abeni u32 itag = 0; 1640bc044e8dSPaolo Abeni int err; 1641bc044e8dSPaolo Abeni 1642bc044e8dSPaolo Abeni err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); 1643bc044e8dSPaolo Abeni if (err) 1644bc044e8dSPaolo Abeni return err; 1645bc044e8dSPaolo Abeni 1646d08c4f35SDavid Ahern if (our) 1647d08c4f35SDavid Ahern flags |= RTCF_LOCAL; 1648d08c4f35SDavid Ahern 1649d08c4f35SDavid Ahern rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, 1650f2bb4bedSDavid S. Miller IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); 16511da177e4SLinus Torvalds if (!rth) 1652bc044e8dSPaolo Abeni return -ENOBUFS; 16531da177e4SLinus Torvalds 1654c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1655d8d1f30bSChangli Gao rth->dst.tclassid = itag; 16561da177e4SLinus Torvalds #endif 1657cf911662SDavid S. Miller rth->dst.output = ip_rt_bug; 16589917e1e8SDavid S. Miller rth->rt_is_input= 1; 16591da177e4SLinus Torvalds 16601da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE 1661f97c1e0cSJoe Perches if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 1662d8d1f30bSChangli Gao rth->dst.input = ip_mr_input; 16631da177e4SLinus Torvalds #endif 16641da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_slow_mc); 16651da177e4SLinus Torvalds 166689aef892SDavid S. Miller skb_dst_set(skb, &rth->dst); 166789aef892SDavid S. Miller return 0; 16681da177e4SLinus Torvalds } 16691da177e4SLinus Torvalds 16701da177e4SLinus Torvalds 16711da177e4SLinus Torvalds static void ip_handle_martian_source(struct net_device *dev, 16721da177e4SLinus Torvalds struct in_device *in_dev, 16731da177e4SLinus Torvalds struct sk_buff *skb, 16749e12bb22SAl Viro __be32 daddr, 16759e12bb22SAl Viro __be32 saddr) 16761da177e4SLinus Torvalds { 16771da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_martian_src); 16781da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE 16791da177e4SLinus Torvalds if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { 16801da177e4SLinus Torvalds /* 16811da177e4SLinus Torvalds * RFC1812 recommendation, if source is martian, 16821da177e4SLinus Torvalds * the only hint is MAC header. 16831da177e4SLinus Torvalds */ 1684058bd4d2SJoe Perches pr_warn("martian source %pI4 from %pI4, on dev %s\n", 1685673d57e7SHarvey Harrison &daddr, &saddr, dev->name); 168698e399f8SArnaldo Carvalho de Melo if (dev->hard_header_len && skb_mac_header_was_set(skb)) { 1687058bd4d2SJoe Perches print_hex_dump(KERN_WARNING, "ll header: ", 1688058bd4d2SJoe Perches DUMP_PREFIX_OFFSET, 16, 1, 1689058bd4d2SJoe Perches skb_mac_header(skb), 1690058bd4d2SJoe Perches dev->hard_header_len, true); 16911da177e4SLinus Torvalds } 16921da177e4SLinus Torvalds } 16931da177e4SLinus Torvalds #endif 16941da177e4SLinus Torvalds } 16951da177e4SLinus Torvalds 169647360228SEric Dumazet /* called in rcu_read_lock() section */ 16975969f71dSStephen Hemminger static int __mkroute_input(struct sk_buff *skb, 1698982721f3SDavid S. Miller const struct fib_result *res, 16991da177e4SLinus Torvalds struct in_device *in_dev, 1700c6cffba4SDavid S. Miller __be32 daddr, __be32 saddr, u32 tos) 17011da177e4SLinus Torvalds { 17022ffae99dSTimo Teräs struct fib_nh_exception *fnhe; 17031da177e4SLinus Torvalds struct rtable *rth; 17041da177e4SLinus Torvalds int err; 17051da177e4SLinus Torvalds struct in_device *out_dev; 1706d2d68ba9SDavid S. Miller bool do_cache; 1707fbdc0ad0SLi RongQing u32 itag = 0; 17081da177e4SLinus Torvalds 17091da177e4SLinus Torvalds /* get a working reference to the output device */ 171047360228SEric Dumazet out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); 171151456b29SIan Morris if (!out_dev) { 1712e87cc472SJoe Perches net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); 17131da177e4SLinus Torvalds return -EINVAL; 17141da177e4SLinus Torvalds } 17151da177e4SLinus Torvalds 17165c04c819SMichael Smith err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), 17179e56e380SDavid S. Miller in_dev->dev, in_dev, &itag); 17181da177e4SLinus Torvalds if (err < 0) { 17191da177e4SLinus Torvalds ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 17201da177e4SLinus Torvalds saddr); 17211da177e4SLinus Torvalds 17221da177e4SLinus Torvalds goto cleanup; 17231da177e4SLinus Torvalds } 17241da177e4SLinus Torvalds 1725e81da0e1SJulian Anastasov do_cache = res->fi && !itag; 1726e81da0e1SJulian Anastasov if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && 1727df4d9254SHannes Frederic Sowa skb->protocol == htons(ETH_P_IP) && 17281da177e4SLinus Torvalds (IN_DEV_SHARED_MEDIA(out_dev) || 1729df4d9254SHannes Frederic Sowa inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1730df4d9254SHannes Frederic Sowa IPCB(skb)->flags |= IPSKB_DOREDIRECT; 17311da177e4SLinus Torvalds 17321da177e4SLinus Torvalds if (skb->protocol != htons(ETH_P_IP)) { 17331da177e4SLinus Torvalds /* Not IP (i.e. ARP). Do not create route, if it is 17341da177e4SLinus Torvalds * invalid for proxy arp. DNAT routes are always valid. 173565324144SJesper Dangaard Brouer * 173665324144SJesper Dangaard Brouer * Proxy arp feature have been extended to allow, ARP 173765324144SJesper Dangaard Brouer * replies back to the same interface, to support 173865324144SJesper Dangaard Brouer * Private VLAN switch technologies. See arp.c. 17391da177e4SLinus Torvalds */ 174065324144SJesper Dangaard Brouer if (out_dev == in_dev && 174165324144SJesper Dangaard Brouer IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { 17421da177e4SLinus Torvalds err = -EINVAL; 17431da177e4SLinus Torvalds goto cleanup; 17441da177e4SLinus Torvalds } 17451da177e4SLinus Torvalds } 17461da177e4SLinus Torvalds 17472ffae99dSTimo Teräs fnhe = find_exception(&FIB_RES_NH(*res), daddr); 1748e81da0e1SJulian Anastasov if (do_cache) { 174994720e3aSJulian Anastasov if (fnhe) 17502ffae99dSTimo Teräs rth = rcu_dereference(fnhe->fnhe_rth_input); 175194720e3aSJulian Anastasov else 175254764bb6SEric Dumazet rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); 1753d2d68ba9SDavid S. Miller if (rt_cache_valid(rth)) { 1754c6cffba4SDavid S. Miller skb_dst_set_noref(skb, &rth->dst); 1755d2d68ba9SDavid S. Miller goto out; 1756d2d68ba9SDavid S. Miller } 1757d2d68ba9SDavid S. Miller } 1758f2bb4bedSDavid S. Miller 1759d08c4f35SDavid Ahern rth = rt_dst_alloc(out_dev->dev, 0, res->type, 17605c1e6aa3SDavid S. Miller IN_DEV_CONF_GET(in_dev, NOPOLICY), 1761d2d68ba9SDavid S. Miller IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); 17621da177e4SLinus Torvalds if (!rth) { 17631da177e4SLinus Torvalds err = -ENOBUFS; 17641da177e4SLinus Torvalds goto cleanup; 17651da177e4SLinus Torvalds } 17661da177e4SLinus Torvalds 17679917e1e8SDavid S. Miller rth->rt_is_input = 1; 1768a6254864SDuan Jiong RT_CACHE_STAT_INC(in_slow_tot); 17691da177e4SLinus Torvalds 1770d8d1f30bSChangli Gao rth->dst.input = ip_forward; 17711da177e4SLinus Torvalds 1772a4c2fd7fSWei Wang rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, 1773a4c2fd7fSWei Wang do_cache); 17749942895bSDavid Ahern lwtunnel_set_redirect(&rth->dst); 1775c6cffba4SDavid S. Miller skb_dst_set(skb, &rth->dst); 1776d2d68ba9SDavid S. Miller out: 17771da177e4SLinus Torvalds err = 0; 17781da177e4SLinus Torvalds cleanup: 17791da177e4SLinus Torvalds return err; 17801da177e4SLinus Torvalds } 17811da177e4SLinus Torvalds 178279a13159SPeter Nørlund #ifdef CONFIG_IP_ROUTE_MULTIPATH 178379a13159SPeter Nørlund /* To make ICMP packets follow the right flow, the multipath hash is 1784bf4e0a3dSNikolay Aleksandrov * calculated from the inner IP addresses. 178579a13159SPeter Nørlund */ 1786bf4e0a3dSNikolay Aleksandrov static void ip_multipath_l3_keys(const struct sk_buff *skb, 1787bf4e0a3dSNikolay Aleksandrov struct flow_keys *hash_keys) 178879a13159SPeter Nørlund { 178979a13159SPeter Nørlund const struct iphdr *outer_iph = ip_hdr(skb); 17906f74b6c2SDavid Ahern const struct iphdr *key_iph = outer_iph; 1791bf4e0a3dSNikolay Aleksandrov const struct iphdr *inner_iph; 179279a13159SPeter Nørlund const struct icmphdr *icmph; 179379a13159SPeter Nørlund struct iphdr _inner_iph; 1794bf4e0a3dSNikolay Aleksandrov struct icmphdr _icmph; 1795bf4e0a3dSNikolay Aleksandrov 1796bf4e0a3dSNikolay Aleksandrov if (likely(outer_iph->protocol != IPPROTO_ICMP)) 17976f74b6c2SDavid Ahern goto out; 179879a13159SPeter Nørlund 179979a13159SPeter Nørlund if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) 18006f74b6c2SDavid Ahern goto out; 180179a13159SPeter Nørlund 180279a13159SPeter Nørlund icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), 180379a13159SPeter Nørlund &_icmph); 180479a13159SPeter Nørlund if (!icmph) 18056f74b6c2SDavid Ahern goto out; 180679a13159SPeter Nørlund 180779a13159SPeter Nørlund if (icmph->type != ICMP_DEST_UNREACH && 180879a13159SPeter Nørlund icmph->type != ICMP_REDIRECT && 180979a13159SPeter Nørlund icmph->type != ICMP_TIME_EXCEEDED && 1810bf4e0a3dSNikolay Aleksandrov icmph->type != ICMP_PARAMETERPROB) 18116f74b6c2SDavid Ahern goto out; 181279a13159SPeter Nørlund 181379a13159SPeter Nørlund inner_iph = skb_header_pointer(skb, 181479a13159SPeter Nørlund outer_iph->ihl * 4 + sizeof(_icmph), 181579a13159SPeter Nørlund sizeof(_inner_iph), &_inner_iph); 181679a13159SPeter Nørlund if (!inner_iph) 18176f74b6c2SDavid Ahern goto out; 18186f74b6c2SDavid Ahern 18196f74b6c2SDavid Ahern key_iph = inner_iph; 18206f74b6c2SDavid Ahern out: 18216f74b6c2SDavid Ahern hash_keys->addrs.v4addrs.src = key_iph->saddr; 18226f74b6c2SDavid Ahern hash_keys->addrs.v4addrs.dst = key_iph->daddr; 182379a13159SPeter Nørlund } 182479a13159SPeter Nørlund 1825bf4e0a3dSNikolay Aleksandrov /* if skb is set it will be used and fl4 can be NULL */ 18267efc0b6bSDavid Ahern int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, 1827e37b1e97SRoopa Prabhu const struct sk_buff *skb, struct flow_keys *flkeys) 1828bf4e0a3dSNikolay Aleksandrov { 1829bf4e0a3dSNikolay Aleksandrov struct flow_keys hash_keys; 1830bf4e0a3dSNikolay Aleksandrov u32 mhash; 1831bf4e0a3dSNikolay Aleksandrov 1832bf4e0a3dSNikolay Aleksandrov switch (net->ipv4.sysctl_fib_multipath_hash_policy) { 1833bf4e0a3dSNikolay Aleksandrov case 0: 1834bf4e0a3dSNikolay Aleksandrov memset(&hash_keys, 0, sizeof(hash_keys)); 1835bf4e0a3dSNikolay Aleksandrov hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1836bf4e0a3dSNikolay Aleksandrov if (skb) { 1837bf4e0a3dSNikolay Aleksandrov ip_multipath_l3_keys(skb, &hash_keys); 1838bf4e0a3dSNikolay Aleksandrov } else { 1839bf4e0a3dSNikolay Aleksandrov hash_keys.addrs.v4addrs.src = fl4->saddr; 1840bf4e0a3dSNikolay Aleksandrov hash_keys.addrs.v4addrs.dst = fl4->daddr; 1841bf4e0a3dSNikolay Aleksandrov } 1842bf4e0a3dSNikolay Aleksandrov break; 1843bf4e0a3dSNikolay Aleksandrov case 1: 1844bf4e0a3dSNikolay Aleksandrov /* skb is currently provided only when forwarding */ 1845bf4e0a3dSNikolay Aleksandrov if (skb) { 1846bf4e0a3dSNikolay Aleksandrov unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; 1847bf4e0a3dSNikolay Aleksandrov struct flow_keys keys; 1848bf4e0a3dSNikolay Aleksandrov 1849bf4e0a3dSNikolay Aleksandrov /* short-circuit if we already have L4 hash present */ 1850bf4e0a3dSNikolay Aleksandrov if (skb->l4_hash) 1851bf4e0a3dSNikolay Aleksandrov return skb_get_hash_raw(skb) >> 1; 1852ec7127a5SDavid Ahern 1853bf4e0a3dSNikolay Aleksandrov memset(&hash_keys, 0, sizeof(hash_keys)); 18541fe4b118SDavid Ahern 1855ec7127a5SDavid Ahern if (!flkeys) { 1856ec7127a5SDavid Ahern skb_flow_dissect_flow_keys(skb, &keys, flag); 1857ec7127a5SDavid Ahern flkeys = &keys; 1858ec7127a5SDavid Ahern } 1859ec7127a5SDavid Ahern 1860e37b1e97SRoopa Prabhu hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1861e37b1e97SRoopa Prabhu hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; 1862e37b1e97SRoopa Prabhu hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; 1863e37b1e97SRoopa Prabhu hash_keys.ports.src = flkeys->ports.src; 1864e37b1e97SRoopa Prabhu hash_keys.ports.dst = flkeys->ports.dst; 1865e37b1e97SRoopa Prabhu hash_keys.basic.ip_proto = flkeys->basic.ip_proto; 1866e37b1e97SRoopa Prabhu } else { 1867bf4e0a3dSNikolay Aleksandrov memset(&hash_keys, 0, sizeof(hash_keys)); 1868bf4e0a3dSNikolay Aleksandrov hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1869bf4e0a3dSNikolay Aleksandrov hash_keys.addrs.v4addrs.src = fl4->saddr; 1870bf4e0a3dSNikolay Aleksandrov hash_keys.addrs.v4addrs.dst = fl4->daddr; 1871bf4e0a3dSNikolay Aleksandrov hash_keys.ports.src = fl4->fl4_sport; 1872bf4e0a3dSNikolay Aleksandrov hash_keys.ports.dst = fl4->fl4_dport; 1873bf4e0a3dSNikolay Aleksandrov hash_keys.basic.ip_proto = fl4->flowi4_proto; 1874bf4e0a3dSNikolay Aleksandrov } 1875bf4e0a3dSNikolay Aleksandrov break; 1876bf4e0a3dSNikolay Aleksandrov } 1877bf4e0a3dSNikolay Aleksandrov mhash = flow_hash_from_keys(&hash_keys); 1878bf4e0a3dSNikolay Aleksandrov 1879bf4e0a3dSNikolay Aleksandrov return mhash >> 1; 1880bf4e0a3dSNikolay Aleksandrov } 188179a13159SPeter Nørlund #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 188279a13159SPeter Nørlund 18835969f71dSStephen Hemminger static int ip_mkroute_input(struct sk_buff *skb, 18841da177e4SLinus Torvalds struct fib_result *res, 18851da177e4SLinus Torvalds struct in_device *in_dev, 1886e37b1e97SRoopa Prabhu __be32 daddr, __be32 saddr, u32 tos, 1887e37b1e97SRoopa Prabhu struct flow_keys *hkeys) 18881da177e4SLinus Torvalds { 18891da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 18900e884c78SPeter Nørlund if (res->fi && res->fi->fib_nhs > 1) { 18917efc0b6bSDavid Ahern int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); 18920e884c78SPeter Nørlund 18930e884c78SPeter Nørlund fib_select_multipath(res, h); 18940e884c78SPeter Nørlund } 18951da177e4SLinus Torvalds #endif 18961da177e4SLinus Torvalds 18971da177e4SLinus Torvalds /* create a routing cache entry */ 1898c6cffba4SDavid S. Miller return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); 18991da177e4SLinus Torvalds } 19001da177e4SLinus Torvalds 19011da177e4SLinus Torvalds /* 19021da177e4SLinus Torvalds * NOTE. We drop all the packets that has local source 19031da177e4SLinus Torvalds * addresses, because every properly looped back packet 19041da177e4SLinus Torvalds * must have correct destination already attached by output routine. 19051da177e4SLinus Torvalds * 19061da177e4SLinus Torvalds * Such approach solves two big problems: 19071da177e4SLinus Torvalds * 1. Not simplex devices are handled properly. 19081da177e4SLinus Torvalds * 2. IP spoofing attempts are filtered with 100% of guarantee. 1909ebc0ffaeSEric Dumazet * called with rcu_read_lock() 19101da177e4SLinus Torvalds */ 19111da177e4SLinus Torvalds 19129e12bb22SAl Viro static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, 19135510cdf7SDavid Ahern u8 tos, struct net_device *dev, 19145510cdf7SDavid Ahern struct fib_result *res) 19151da177e4SLinus Torvalds { 191696d36220SEric Dumazet struct in_device *in_dev = __in_dev_get_rcu(dev); 1917e37b1e97SRoopa Prabhu struct flow_keys *flkeys = NULL, _flkeys; 1918e37b1e97SRoopa Prabhu struct net *net = dev_net(dev); 19191b7179d3SThomas Graf struct ip_tunnel_info *tun_info; 1920e37b1e97SRoopa Prabhu int err = -EINVAL; 192195c96174SEric Dumazet unsigned int flags = 0; 19221da177e4SLinus Torvalds u32 itag = 0; 19231da177e4SLinus Torvalds struct rtable *rth; 1924e37b1e97SRoopa Prabhu struct flowi4 fl4; 1925d2d68ba9SDavid S. Miller bool do_cache; 19261da177e4SLinus Torvalds 19271da177e4SLinus Torvalds /* IP on this device is disabled. */ 19281da177e4SLinus Torvalds 19291da177e4SLinus Torvalds if (!in_dev) 19301da177e4SLinus Torvalds goto out; 19311da177e4SLinus Torvalds 19321da177e4SLinus Torvalds /* Check for the most weird martians, which can be not detected 19331da177e4SLinus Torvalds by fib_lookup. 19341da177e4SLinus Torvalds */ 19351da177e4SLinus Torvalds 193661adedf3SJiri Benc tun_info = skb_tunnel_info(skb); 193746fa062aSJiri Benc if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 19381b7179d3SThomas Graf fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id; 19391b7179d3SThomas Graf else 19401b7179d3SThomas Graf fl4.flowi4_tun_key.tun_id = 0; 1941f38a9eb1SThomas Graf skb_dst_drop(skb); 1942f38a9eb1SThomas Graf 1943d0daebc3SThomas Graf if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) 19441da177e4SLinus Torvalds goto martian_source; 19451da177e4SLinus Torvalds 19465510cdf7SDavid Ahern res->fi = NULL; 19475510cdf7SDavid Ahern res->table = NULL; 194827a954bdSAndy Walls if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) 19491da177e4SLinus Torvalds goto brd_input; 19501da177e4SLinus Torvalds 19511da177e4SLinus Torvalds /* Accept zero addresses only to limited broadcast; 19521da177e4SLinus Torvalds * I even do not know to fix it or not. Waiting for complains :-) 19531da177e4SLinus Torvalds */ 1954f97c1e0cSJoe Perches if (ipv4_is_zeronet(saddr)) 19551da177e4SLinus Torvalds goto martian_source; 19561da177e4SLinus Torvalds 1957d0daebc3SThomas Graf if (ipv4_is_zeronet(daddr)) 19581da177e4SLinus Torvalds goto martian_destination; 19591da177e4SLinus Torvalds 19609eb43e76SEric Dumazet /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), 19619eb43e76SEric Dumazet * and call it once if daddr or/and saddr are loopback addresses 19629eb43e76SEric Dumazet */ 19639eb43e76SEric Dumazet if (ipv4_is_loopback(daddr)) { 19649eb43e76SEric Dumazet if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) 1965d0daebc3SThomas Graf goto martian_destination; 19669eb43e76SEric Dumazet } else if (ipv4_is_loopback(saddr)) { 19679eb43e76SEric Dumazet if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) 1968d0daebc3SThomas Graf goto martian_source; 1969d0daebc3SThomas Graf } 1970d0daebc3SThomas Graf 19711da177e4SLinus Torvalds /* 19721da177e4SLinus Torvalds * Now we are ready to route packet. 19731da177e4SLinus Torvalds */ 197468a5e3ddSDavid S. Miller fl4.flowi4_oif = 0; 1975e0d56fddSDavid Ahern fl4.flowi4_iif = dev->ifindex; 197668a5e3ddSDavid S. Miller fl4.flowi4_mark = skb->mark; 197768a5e3ddSDavid S. Miller fl4.flowi4_tos = tos; 197868a5e3ddSDavid S. Miller fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 1979b84f7878SDavid Ahern fl4.flowi4_flags = 0; 198068a5e3ddSDavid S. Miller fl4.daddr = daddr; 198168a5e3ddSDavid S. Miller fl4.saddr = saddr; 19828bcfd092SJulian Anastasov fl4.flowi4_uid = sock_net_uid(net, NULL); 1983e37b1e97SRoopa Prabhu 19845a847a6eSDavid Ahern if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { 1985e37b1e97SRoopa Prabhu flkeys = &_flkeys; 19865a847a6eSDavid Ahern } else { 19875a847a6eSDavid Ahern fl4.flowi4_proto = 0; 19885a847a6eSDavid Ahern fl4.fl4_sport = 0; 19895a847a6eSDavid Ahern fl4.fl4_dport = 0; 19905a847a6eSDavid Ahern } 1991e37b1e97SRoopa Prabhu 19925510cdf7SDavid Ahern err = fib_lookup(net, &fl4, res, 0); 1993cd0f0b95SDuan Jiong if (err != 0) { 1994cd0f0b95SDuan Jiong if (!IN_DEV_FORWARD(in_dev)) 1995cd0f0b95SDuan Jiong err = -EHOSTUNREACH; 19961da177e4SLinus Torvalds goto no_route; 1997cd0f0b95SDuan Jiong } 19981da177e4SLinus Torvalds 1999*5cbf777cSXin Long if (res->type == RTN_BROADCAST) { 2000*5cbf777cSXin Long if (IN_DEV_BFORWARD(in_dev)) 2001*5cbf777cSXin Long goto make_route; 20021da177e4SLinus Torvalds goto brd_input; 2003*5cbf777cSXin Long } 20041da177e4SLinus Torvalds 20055510cdf7SDavid Ahern if (res->type == RTN_LOCAL) { 20065c04c819SMichael Smith err = fib_validate_source(skb, saddr, daddr, tos, 20070d5edc68SCong Wang 0, dev, in_dev, &itag); 2008b5f7e755SEric Dumazet if (err < 0) 20090d753960SDavid Ahern goto martian_source; 20101da177e4SLinus Torvalds goto local_input; 20111da177e4SLinus Torvalds } 20121da177e4SLinus Torvalds 2013cd0f0b95SDuan Jiong if (!IN_DEV_FORWARD(in_dev)) { 2014cd0f0b95SDuan Jiong err = -EHOSTUNREACH; 2015251da413SDavid S. Miller goto no_route; 2016cd0f0b95SDuan Jiong } 20175510cdf7SDavid Ahern if (res->type != RTN_UNICAST) 20181da177e4SLinus Torvalds goto martian_destination; 20191da177e4SLinus Torvalds 2020*5cbf777cSXin Long make_route: 2021e37b1e97SRoopa Prabhu err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); 20221da177e4SLinus Torvalds out: return err; 20231da177e4SLinus Torvalds 20241da177e4SLinus Torvalds brd_input: 20251da177e4SLinus Torvalds if (skb->protocol != htons(ETH_P_IP)) 20261da177e4SLinus Torvalds goto e_inval; 20271da177e4SLinus Torvalds 202841347dcdSDavid S. Miller if (!ipv4_is_zeronet(saddr)) { 20299e56e380SDavid S. Miller err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 20309e56e380SDavid S. Miller in_dev, &itag); 20311da177e4SLinus Torvalds if (err < 0) 20320d753960SDavid Ahern goto martian_source; 20331da177e4SLinus Torvalds } 20341da177e4SLinus Torvalds flags |= RTCF_BROADCAST; 20355510cdf7SDavid Ahern res->type = RTN_BROADCAST; 20361da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_brd); 20371da177e4SLinus Torvalds 20381da177e4SLinus Torvalds local_input: 2039d2d68ba9SDavid S. Miller do_cache = false; 20405510cdf7SDavid Ahern if (res->fi) { 2041fe3edf45SDavid S. Miller if (!itag) { 20425510cdf7SDavid Ahern rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); 2043d2d68ba9SDavid S. Miller if (rt_cache_valid(rth)) { 2044c6cffba4SDavid S. Miller skb_dst_set_noref(skb, &rth->dst); 2045c6cffba4SDavid S. Miller err = 0; 2046c6cffba4SDavid S. Miller goto out; 2047d2d68ba9SDavid S. Miller } 2048d2d68ba9SDavid S. Miller do_cache = true; 2049d2d68ba9SDavid S. Miller } 2050d2d68ba9SDavid S. Miller } 2051d2d68ba9SDavid S. Miller 2052f5a0aab8SDavid Ahern rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, 20535510cdf7SDavid Ahern flags | RTCF_LOCAL, res->type, 2054d2d68ba9SDavid S. Miller IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); 20551da177e4SLinus Torvalds if (!rth) 20561da177e4SLinus Torvalds goto e_nobufs; 20571da177e4SLinus Torvalds 2058d8d1f30bSChangli Gao rth->dst.output= ip_rt_bug; 2059cf911662SDavid S. Miller #ifdef CONFIG_IP_ROUTE_CLASSID 2060cf911662SDavid S. Miller rth->dst.tclassid = itag; 2061cf911662SDavid S. Miller #endif 20629917e1e8SDavid S. Miller rth->rt_is_input = 1; 2063571e7226SRoopa Prabhu 2064a6254864SDuan Jiong RT_CACHE_STAT_INC(in_slow_tot); 20655510cdf7SDavid Ahern if (res->type == RTN_UNREACHABLE) { 2066d8d1f30bSChangli Gao rth->dst.input= ip_error; 2067d8d1f30bSChangli Gao rth->dst.error= -err; 20681da177e4SLinus Torvalds rth->rt_flags &= ~RTCF_LOCAL; 20691da177e4SLinus Torvalds } 2070efd85700SThomas Graf 2071dcdfdf56SAlexei Starovoitov if (do_cache) { 20725510cdf7SDavid Ahern struct fib_nh *nh = &FIB_RES_NH(*res); 2073efd85700SThomas Graf 2074efd85700SThomas Graf rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); 2075efd85700SThomas Graf if (lwtunnel_input_redirect(rth->dst.lwtstate)) { 2076efd85700SThomas Graf WARN_ON(rth->dst.input == lwtunnel_input); 2077efd85700SThomas Graf rth->dst.lwtstate->orig_input = rth->dst.input; 2078efd85700SThomas Graf rth->dst.input = lwtunnel_input; 2079efd85700SThomas Graf } 2080efd85700SThomas Graf 2081a4c2fd7fSWei Wang if (unlikely(!rt_cache_route(nh, rth))) 2082dcdfdf56SAlexei Starovoitov rt_add_uncached_list(rth); 2083dcdfdf56SAlexei Starovoitov } 208489aef892SDavid S. Miller skb_dst_set(skb, &rth->dst); 2085b23dd4feSDavid S. Miller err = 0; 2086ebc0ffaeSEric Dumazet goto out; 20871da177e4SLinus Torvalds 20881da177e4SLinus Torvalds no_route: 20891da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_no_route); 20905510cdf7SDavid Ahern res->type = RTN_UNREACHABLE; 20915510cdf7SDavid Ahern res->fi = NULL; 20925510cdf7SDavid Ahern res->table = NULL; 20931da177e4SLinus Torvalds goto local_input; 20941da177e4SLinus Torvalds 20951da177e4SLinus Torvalds /* 20961da177e4SLinus Torvalds * Do not cache martian addresses: they should be logged (RFC1812) 20971da177e4SLinus Torvalds */ 20981da177e4SLinus Torvalds martian_destination: 20991da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_martian_dst); 21001da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE 2101e87cc472SJoe Perches if (IN_DEV_LOG_MARTIANS(in_dev)) 2102e87cc472SJoe Perches net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", 2103673d57e7SHarvey Harrison &daddr, &saddr, dev->name); 21041da177e4SLinus Torvalds #endif 21052c2910a4SDietmar Eggemann 21061da177e4SLinus Torvalds e_inval: 21071da177e4SLinus Torvalds err = -EINVAL; 2108ebc0ffaeSEric Dumazet goto out; 21091da177e4SLinus Torvalds 21101da177e4SLinus Torvalds e_nobufs: 21111da177e4SLinus Torvalds err = -ENOBUFS; 2112ebc0ffaeSEric Dumazet goto out; 21131da177e4SLinus Torvalds 21141da177e4SLinus Torvalds martian_source: 21151da177e4SLinus Torvalds ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2116ebc0ffaeSEric Dumazet goto out; 21171da177e4SLinus Torvalds } 21181da177e4SLinus Torvalds 2119c6cffba4SDavid S. Miller int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, 212038a424e4SDavid Miller u8 tos, struct net_device *dev) 21211da177e4SLinus Torvalds { 21225510cdf7SDavid Ahern struct fib_result res; 21235510cdf7SDavid Ahern int err; 21241da177e4SLinus Torvalds 21256e28099dSJulian Anastasov tos &= IPTOS_RT_MASK; 212696d36220SEric Dumazet rcu_read_lock(); 21275510cdf7SDavid Ahern err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); 21285510cdf7SDavid Ahern rcu_read_unlock(); 212996d36220SEric Dumazet 21305510cdf7SDavid Ahern return err; 21315510cdf7SDavid Ahern } 21325510cdf7SDavid Ahern EXPORT_SYMBOL(ip_route_input_noref); 21335510cdf7SDavid Ahern 21345510cdf7SDavid Ahern /* called with rcu_read_lock held */ 21355510cdf7SDavid Ahern int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, 21365510cdf7SDavid Ahern u8 tos, struct net_device *dev, struct fib_result *res) 21375510cdf7SDavid Ahern { 21381da177e4SLinus Torvalds /* Multicast recognition logic is moved from route cache to here. 21391da177e4SLinus Torvalds The problem was that too many Ethernet cards have broken/missing 21401da177e4SLinus Torvalds hardware multicast filters :-( As result the host on multicasting 21411da177e4SLinus Torvalds network acquires a lot of useless route cache entries, sort of 21421da177e4SLinus Torvalds SDR messages from all the world. Now we try to get rid of them. 21431da177e4SLinus Torvalds Really, provided software IP multicast filter is organized 21441da177e4SLinus Torvalds reasonably (at least, hashed), it does not result in a slowdown 21451da177e4SLinus Torvalds comparing with route cache reject entries. 21461da177e4SLinus Torvalds Note, that multicast routers are not affected, because 21471da177e4SLinus Torvalds route cache entry is created eventually. 21481da177e4SLinus Torvalds */ 2149f97c1e0cSJoe Perches if (ipv4_is_multicast(daddr)) { 215096d36220SEric Dumazet struct in_device *in_dev = __in_dev_get_rcu(dev); 2151e58e4159SDavid Ahern int our = 0; 21525510cdf7SDavid Ahern int err = -EINVAL; 21531da177e4SLinus Torvalds 2154e58e4159SDavid Ahern if (in_dev) 2155e58e4159SDavid Ahern our = ip_check_mc_rcu(in_dev, daddr, saddr, 2156eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->protocol); 2157e58e4159SDavid Ahern 2158e58e4159SDavid Ahern /* check l3 master if no match yet */ 2159e58e4159SDavid Ahern if ((!in_dev || !our) && netif_is_l3_slave(dev)) { 2160e58e4159SDavid Ahern struct in_device *l3_in_dev; 2161e58e4159SDavid Ahern 2162e58e4159SDavid Ahern l3_in_dev = __in_dev_get_rcu(skb->dev); 2163e58e4159SDavid Ahern if (l3_in_dev) 2164e58e4159SDavid Ahern our = ip_check_mc_rcu(l3_in_dev, daddr, saddr, 2165e58e4159SDavid Ahern ip_hdr(skb)->protocol); 2166e58e4159SDavid Ahern } 2167e58e4159SDavid Ahern 21681da177e4SLinus Torvalds if (our 21691da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE 21709d4fb27dSJoe Perches || 21719d4fb27dSJoe Perches (!ipv4_is_local_multicast(daddr) && 2172f97c1e0cSJoe Perches IN_DEV_MFORWARD(in_dev)) 21731da177e4SLinus Torvalds #endif 21741da177e4SLinus Torvalds ) { 21755510cdf7SDavid Ahern err = ip_route_input_mc(skb, daddr, saddr, 21761da177e4SLinus Torvalds tos, dev, our); 2177e58e4159SDavid Ahern } 21785510cdf7SDavid Ahern return err; 21791da177e4SLinus Torvalds } 21805510cdf7SDavid Ahern 21815510cdf7SDavid Ahern return ip_route_input_slow(skb, daddr, saddr, tos, dev, res); 21821da177e4SLinus Torvalds } 21831da177e4SLinus Torvalds 2184ebc0ffaeSEric Dumazet /* called with rcu_read_lock() */ 2185982721f3SDavid S. Miller static struct rtable *__mkroute_output(const struct fib_result *res, 21861a00fee4SDavid Miller const struct flowi4 *fl4, int orig_oif, 2187f61759e6SJulian Anastasov struct net_device *dev_out, 21885ada5527SDavid S. Miller unsigned int flags) 21891da177e4SLinus Torvalds { 2190982721f3SDavid S. Miller struct fib_info *fi = res->fi; 2191f2bb4bedSDavid S. Miller struct fib_nh_exception *fnhe; 21925ada5527SDavid S. Miller struct in_device *in_dev; 2193982721f3SDavid S. Miller u16 type = res->type; 21945ada5527SDavid S. Miller struct rtable *rth; 2195c92b9655SJulian Anastasov bool do_cache; 21961da177e4SLinus Torvalds 2197d0daebc3SThomas Graf in_dev = __in_dev_get_rcu(dev_out); 2198d0daebc3SThomas Graf if (!in_dev) 2199d0daebc3SThomas Graf return ERR_PTR(-EINVAL); 2200d0daebc3SThomas Graf 2201d0daebc3SThomas Graf if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) 22025f02ce24SDavid Ahern if (ipv4_is_loopback(fl4->saddr) && 22035f02ce24SDavid Ahern !(dev_out->flags & IFF_LOOPBACK) && 22045f02ce24SDavid Ahern !netif_is_l3_master(dev_out)) 22055ada5527SDavid S. Miller return ERR_PTR(-EINVAL); 22061da177e4SLinus Torvalds 220768a5e3ddSDavid S. Miller if (ipv4_is_lbcast(fl4->daddr)) 2208982721f3SDavid S. Miller type = RTN_BROADCAST; 220968a5e3ddSDavid S. Miller else if (ipv4_is_multicast(fl4->daddr)) 2210982721f3SDavid S. Miller type = RTN_MULTICAST; 221168a5e3ddSDavid S. Miller else if (ipv4_is_zeronet(fl4->daddr)) 22125ada5527SDavid S. Miller return ERR_PTR(-EINVAL); 22131da177e4SLinus Torvalds 22141da177e4SLinus Torvalds if (dev_out->flags & IFF_LOOPBACK) 22151da177e4SLinus Torvalds flags |= RTCF_LOCAL; 22161da177e4SLinus Torvalds 221763617421SJulian Anastasov do_cache = true; 2218982721f3SDavid S. Miller if (type == RTN_BROADCAST) { 22191da177e4SLinus Torvalds flags |= RTCF_BROADCAST | RTCF_LOCAL; 2220982721f3SDavid S. Miller fi = NULL; 2221982721f3SDavid S. Miller } else if (type == RTN_MULTICAST) { 22221da177e4SLinus Torvalds flags |= RTCF_MULTICAST | RTCF_LOCAL; 2223813b3b5dSDavid S. Miller if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, 2224813b3b5dSDavid S. Miller fl4->flowi4_proto)) 22251da177e4SLinus Torvalds flags &= ~RTCF_LOCAL; 222663617421SJulian Anastasov else 222763617421SJulian Anastasov do_cache = false; 22281da177e4SLinus Torvalds /* If multicast route do not exist use 2229dd28d1a0SEric Dumazet * default one, but do not gateway in this case. 2230dd28d1a0SEric Dumazet * Yes, it is hack. 22311da177e4SLinus Torvalds */ 2232982721f3SDavid S. Miller if (fi && res->prefixlen < 4) 2233982721f3SDavid S. Miller fi = NULL; 2234d6d5e999SChris Friesen } else if ((type == RTN_LOCAL) && (orig_oif != 0) && 2235d6d5e999SChris Friesen (orig_oif != dev_out->ifindex)) { 2236d6d5e999SChris Friesen /* For local routes that require a particular output interface 2237d6d5e999SChris Friesen * we do not want to cache the result. Caching the result 2238d6d5e999SChris Friesen * causes incorrect behaviour when there are multiple source 2239d6d5e999SChris Friesen * addresses on the interface, the end result being that if the 2240d6d5e999SChris Friesen * intended recipient is waiting on that interface for the 2241d6d5e999SChris Friesen * packet he won't receive it because it will be delivered on 2242d6d5e999SChris Friesen * the loopback interface and the IP_PKTINFO ipi_ifindex will 2243d6d5e999SChris Friesen * be set to the loopback interface as well. 2244d6d5e999SChris Friesen */ 224594720e3aSJulian Anastasov do_cache = false; 22461da177e4SLinus Torvalds } 22471da177e4SLinus Torvalds 2248f2bb4bedSDavid S. Miller fnhe = NULL; 224963617421SJulian Anastasov do_cache &= fi != NULL; 225094720e3aSJulian Anastasov if (fi) { 2251d26b3a7cSEric Dumazet struct rtable __rcu **prth; 2252c92b9655SJulian Anastasov struct fib_nh *nh = &FIB_RES_NH(*res); 2253d26b3a7cSEric Dumazet 2254c92b9655SJulian Anastasov fnhe = find_exception(nh, fl4->daddr); 225594720e3aSJulian Anastasov if (!do_cache) 225694720e3aSJulian Anastasov goto add; 2257deed49dfSXin Long if (fnhe) { 22582ffae99dSTimo Teräs prth = &fnhe->fnhe_rth_output; 2259deed49dfSXin Long } else { 2260c92b9655SJulian Anastasov if (unlikely(fl4->flowi4_flags & 2261c92b9655SJulian Anastasov FLOWI_FLAG_KNOWN_NH && 2262c92b9655SJulian Anastasov !(nh->nh_gw && 2263c92b9655SJulian Anastasov nh->nh_scope == RT_SCOPE_LINK))) { 2264c92b9655SJulian Anastasov do_cache = false; 2265c92b9655SJulian Anastasov goto add; 2266c92b9655SJulian Anastasov } 2267903ceff7SChristoph Lameter prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); 226894720e3aSJulian Anastasov } 2269d26b3a7cSEric Dumazet rth = rcu_dereference(*prth); 22709df16efaSWei Wang if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) 2271f2bb4bedSDavid S. Miller return rth; 2272f2bb4bedSDavid S. Miller } 2273c92b9655SJulian Anastasov 2274c92b9655SJulian Anastasov add: 2275d08c4f35SDavid Ahern rth = rt_dst_alloc(dev_out, flags, type, 22765c1e6aa3SDavid S. Miller IN_DEV_CONF_GET(in_dev, NOPOLICY), 2277f2bb4bedSDavid S. Miller IN_DEV_CONF_GET(in_dev, NOXFRM), 2278c92b9655SJulian Anastasov do_cache); 22798391d07bSDimitris Michailidis if (!rth) 22805ada5527SDavid S. Miller return ERR_PTR(-ENOBUFS); 22818391d07bSDimitris Michailidis 22829438c871SDavid Ahern rth->rt_iif = orig_oif; 2283b7503e0cSDavid Ahern 22841da177e4SLinus Torvalds RT_CACHE_STAT_INC(out_slow_tot); 22851da177e4SLinus Torvalds 22861da177e4SLinus Torvalds if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 22871da177e4SLinus Torvalds if (flags & RTCF_LOCAL && 22881da177e4SLinus Torvalds !(dev_out->flags & IFF_LOOPBACK)) { 2289d8d1f30bSChangli Gao rth->dst.output = ip_mc_output; 22901da177e4SLinus Torvalds RT_CACHE_STAT_INC(out_slow_mc); 22911da177e4SLinus Torvalds } 22921da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE 2293982721f3SDavid S. Miller if (type == RTN_MULTICAST) { 22941da177e4SLinus Torvalds if (IN_DEV_MFORWARD(in_dev) && 2295813b3b5dSDavid S. Miller !ipv4_is_local_multicast(fl4->daddr)) { 2296d8d1f30bSChangli Gao rth->dst.input = ip_mr_input; 2297d8d1f30bSChangli Gao rth->dst.output = ip_mc_output; 22981da177e4SLinus Torvalds } 22991da177e4SLinus Torvalds } 23001da177e4SLinus Torvalds #endif 23011da177e4SLinus Torvalds } 23021da177e4SLinus Torvalds 2303a4c2fd7fSWei Wang rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); 23049942895bSDavid Ahern lwtunnel_set_redirect(&rth->dst); 23051da177e4SLinus Torvalds 23065ada5527SDavid S. Miller return rth; 23071da177e4SLinus Torvalds } 23081da177e4SLinus Torvalds 23091da177e4SLinus Torvalds /* 23101da177e4SLinus Torvalds * Major route resolver routine. 23111da177e4SLinus Torvalds */ 23121da177e4SLinus Torvalds 23133abd1adeSDavid Ahern struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, 2314bf4e0a3dSNikolay Aleksandrov const struct sk_buff *skb) 23151da177e4SLinus Torvalds { 2316f61759e6SJulian Anastasov __u8 tos = RT_FL_TOS(fl4); 2317d0ea2b12SEric Dumazet struct fib_result res = { 2318d0ea2b12SEric Dumazet .type = RTN_UNSPEC, 2319d0ea2b12SEric Dumazet .fi = NULL, 2320d0ea2b12SEric Dumazet .table = NULL, 2321d0ea2b12SEric Dumazet .tclassid = 0, 2322d0ea2b12SEric Dumazet }; 23235ada5527SDavid S. Miller struct rtable *rth; 23241da177e4SLinus Torvalds 23251fb9489bSPavel Emelyanov fl4->flowi4_iif = LOOPBACK_IFINDEX; 2326813b3b5dSDavid S. Miller fl4->flowi4_tos = tos & IPTOS_RT_MASK; 2327813b3b5dSDavid S. Miller fl4->flowi4_scope = ((tos & RTO_ONLINK) ? 232844713b67SDavid S. Miller RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); 232944713b67SDavid S. Miller 2330010c2708SDavid S. Miller rcu_read_lock(); 23313abd1adeSDavid Ahern rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); 23323abd1adeSDavid Ahern rcu_read_unlock(); 23333abd1adeSDavid Ahern 23343abd1adeSDavid Ahern return rth; 23353abd1adeSDavid Ahern } 23363abd1adeSDavid Ahern EXPORT_SYMBOL_GPL(ip_route_output_key_hash); 23373abd1adeSDavid Ahern 23383abd1adeSDavid Ahern struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, 23393abd1adeSDavid Ahern struct fib_result *res, 23403abd1adeSDavid Ahern const struct sk_buff *skb) 23413abd1adeSDavid Ahern { 23423abd1adeSDavid Ahern struct net_device *dev_out = NULL; 23433abd1adeSDavid Ahern int orig_oif = fl4->flowi4_oif; 23443abd1adeSDavid Ahern unsigned int flags = 0; 23453abd1adeSDavid Ahern struct rtable *rth; 23463abd1adeSDavid Ahern int err = -ENETUNREACH; 23473abd1adeSDavid Ahern 2348813b3b5dSDavid S. Miller if (fl4->saddr) { 2349b23dd4feSDavid S. Miller rth = ERR_PTR(-EINVAL); 2350813b3b5dSDavid S. Miller if (ipv4_is_multicast(fl4->saddr) || 2351813b3b5dSDavid S. Miller ipv4_is_lbcast(fl4->saddr) || 2352813b3b5dSDavid S. Miller ipv4_is_zeronet(fl4->saddr)) 23531da177e4SLinus Torvalds goto out; 23541da177e4SLinus Torvalds 23551da177e4SLinus Torvalds /* I removed check for oif == dev_out->oif here. 23561da177e4SLinus Torvalds It was wrong for two reasons: 23571ab35276SDenis V. Lunev 1. ip_dev_find(net, saddr) can return wrong iface, if saddr 23581ab35276SDenis V. Lunev is assigned to multiple interfaces. 23591da177e4SLinus Torvalds 2. Moreover, we are allowed to send packets with saddr 23601da177e4SLinus Torvalds of another iface. --ANK 23611da177e4SLinus Torvalds */ 23621da177e4SLinus Torvalds 2363813b3b5dSDavid S. Miller if (fl4->flowi4_oif == 0 && 2364813b3b5dSDavid S. Miller (ipv4_is_multicast(fl4->daddr) || 2365813b3b5dSDavid S. Miller ipv4_is_lbcast(fl4->daddr))) { 2366a210d01aSJulian Anastasov /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2367813b3b5dSDavid S. Miller dev_out = __ip_dev_find(net, fl4->saddr, false); 236851456b29SIan Morris if (!dev_out) 2369a210d01aSJulian Anastasov goto out; 2370a210d01aSJulian Anastasov 23711da177e4SLinus Torvalds /* Special hack: user can direct multicasts 23721da177e4SLinus Torvalds and limited broadcast via necessary interface 23731da177e4SLinus Torvalds without fiddling with IP_MULTICAST_IF or IP_PKTINFO. 23741da177e4SLinus Torvalds This hack is not just for fun, it allows 23751da177e4SLinus Torvalds vic,vat and friends to work. 23761da177e4SLinus Torvalds They bind socket to loopback, set ttl to zero 23771da177e4SLinus Torvalds and expect that it will work. 23781da177e4SLinus Torvalds From the viewpoint of routing cache they are broken, 23791da177e4SLinus Torvalds because we are not allowed to build multicast path 23801da177e4SLinus Torvalds with loopback source addr (look, routing cache 23811da177e4SLinus Torvalds cannot know, that ttl is zero, so that packet 23821da177e4SLinus Torvalds will not leave this host and route is valid). 23831da177e4SLinus Torvalds Luckily, this hack is good workaround. 23841da177e4SLinus Torvalds */ 23851da177e4SLinus Torvalds 2386813b3b5dSDavid S. Miller fl4->flowi4_oif = dev_out->ifindex; 23871da177e4SLinus Torvalds goto make_route; 23881da177e4SLinus Torvalds } 2389a210d01aSJulian Anastasov 2390813b3b5dSDavid S. Miller if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { 2391a210d01aSJulian Anastasov /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2392813b3b5dSDavid S. Miller if (!__ip_dev_find(net, fl4->saddr, false)) 2393a210d01aSJulian Anastasov goto out; 23941da177e4SLinus Torvalds } 2395a210d01aSJulian Anastasov } 23961da177e4SLinus Torvalds 23971da177e4SLinus Torvalds 2398813b3b5dSDavid S. Miller if (fl4->flowi4_oif) { 2399813b3b5dSDavid S. Miller dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); 2400b23dd4feSDavid S. Miller rth = ERR_PTR(-ENODEV); 240151456b29SIan Morris if (!dev_out) 24021da177e4SLinus Torvalds goto out; 2403e5ed6399SHerbert Xu 2404e5ed6399SHerbert Xu /* RACE: Check return value of inet_select_addr instead. */ 2405fc75fc83SEric Dumazet if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { 2406b23dd4feSDavid S. Miller rth = ERR_PTR(-ENETUNREACH); 2407fc75fc83SEric Dumazet goto out; 2408fc75fc83SEric Dumazet } 2409813b3b5dSDavid S. Miller if (ipv4_is_local_multicast(fl4->daddr) || 24106a211654SAndrew Lunn ipv4_is_lbcast(fl4->daddr) || 24116a211654SAndrew Lunn fl4->flowi4_proto == IPPROTO_IGMP) { 2412813b3b5dSDavid S. Miller if (!fl4->saddr) 2413813b3b5dSDavid S. Miller fl4->saddr = inet_select_addr(dev_out, 0, 24141da177e4SLinus Torvalds RT_SCOPE_LINK); 24151da177e4SLinus Torvalds goto make_route; 24161da177e4SLinus Torvalds } 24170a7e2260SJiri Benc if (!fl4->saddr) { 2418813b3b5dSDavid S. Miller if (ipv4_is_multicast(fl4->daddr)) 2419813b3b5dSDavid S. Miller fl4->saddr = inet_select_addr(dev_out, 0, 2420813b3b5dSDavid S. Miller fl4->flowi4_scope); 2421813b3b5dSDavid S. Miller else if (!fl4->daddr) 2422813b3b5dSDavid S. Miller fl4->saddr = inet_select_addr(dev_out, 0, 24231da177e4SLinus Torvalds RT_SCOPE_HOST); 24241da177e4SLinus Torvalds } 2425613d09b3SDavid Ahern } 24261da177e4SLinus Torvalds 2427813b3b5dSDavid S. Miller if (!fl4->daddr) { 2428813b3b5dSDavid S. Miller fl4->daddr = fl4->saddr; 2429813b3b5dSDavid S. Miller if (!fl4->daddr) 2430813b3b5dSDavid S. Miller fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); 2431b40afd0eSDenis V. Lunev dev_out = net->loopback_dev; 24321fb9489bSPavel Emelyanov fl4->flowi4_oif = LOOPBACK_IFINDEX; 24333abd1adeSDavid Ahern res->type = RTN_LOCAL; 24341da177e4SLinus Torvalds flags |= RTCF_LOCAL; 24351da177e4SLinus Torvalds goto make_route; 24361da177e4SLinus Torvalds } 24371da177e4SLinus Torvalds 24383abd1adeSDavid Ahern err = fib_lookup(net, fl4, res, 0); 24390315e382SNikola Forró if (err) { 24403abd1adeSDavid Ahern res->fi = NULL; 24413abd1adeSDavid Ahern res->table = NULL; 24426104e112SDavid Ahern if (fl4->flowi4_oif && 2443e58e4159SDavid Ahern (ipv4_is_multicast(fl4->daddr) || 2444e58e4159SDavid Ahern !netif_index_is_l3_master(net, fl4->flowi4_oif))) { 24451da177e4SLinus Torvalds /* Apparently, routing tables are wrong. Assume, 24461da177e4SLinus Torvalds that the destination is on link. 24471da177e4SLinus Torvalds 24481da177e4SLinus Torvalds WHY? DW. 24491da177e4SLinus Torvalds Because we are allowed to send to iface 24501da177e4SLinus Torvalds even if it has NO routes and NO assigned 24511da177e4SLinus Torvalds addresses. When oif is specified, routing 24521da177e4SLinus Torvalds tables are looked up with only one purpose: 24531da177e4SLinus Torvalds to catch if destination is gatewayed, rather than 24541da177e4SLinus Torvalds direct. Moreover, if MSG_DONTROUTE is set, 24551da177e4SLinus Torvalds we send packet, ignoring both routing tables 24561da177e4SLinus Torvalds and ifaddr state. --ANK 24571da177e4SLinus Torvalds 24581da177e4SLinus Torvalds 24591da177e4SLinus Torvalds We could make it even if oif is unknown, 24601da177e4SLinus Torvalds likely IPv6, but we do not. 24611da177e4SLinus Torvalds */ 24621da177e4SLinus Torvalds 2463813b3b5dSDavid S. Miller if (fl4->saddr == 0) 2464813b3b5dSDavid S. Miller fl4->saddr = inet_select_addr(dev_out, 0, 24651da177e4SLinus Torvalds RT_SCOPE_LINK); 24663abd1adeSDavid Ahern res->type = RTN_UNICAST; 24671da177e4SLinus Torvalds goto make_route; 24681da177e4SLinus Torvalds } 24690315e382SNikola Forró rth = ERR_PTR(err); 24701da177e4SLinus Torvalds goto out; 24711da177e4SLinus Torvalds } 24721da177e4SLinus Torvalds 24733abd1adeSDavid Ahern if (res->type == RTN_LOCAL) { 2474813b3b5dSDavid S. Miller if (!fl4->saddr) { 24753abd1adeSDavid Ahern if (res->fi->fib_prefsrc) 24763abd1adeSDavid Ahern fl4->saddr = res->fi->fib_prefsrc; 24779fc3bbb4SJoel Sing else 2478813b3b5dSDavid S. Miller fl4->saddr = fl4->daddr; 24799fc3bbb4SJoel Sing } 24805f02ce24SDavid Ahern 24815f02ce24SDavid Ahern /* L3 master device is the loopback for that domain */ 24823abd1adeSDavid Ahern dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : 2483b7c8487cSRobert Shearman net->loopback_dev; 2484839da4d9SDavid Ahern 2485839da4d9SDavid Ahern /* make sure orig_oif points to fib result device even 2486839da4d9SDavid Ahern * though packet rx/tx happens over loopback or l3mdev 2487839da4d9SDavid Ahern */ 2488839da4d9SDavid Ahern orig_oif = FIB_RES_OIF(*res); 2489839da4d9SDavid Ahern 2490813b3b5dSDavid S. Miller fl4->flowi4_oif = dev_out->ifindex; 24911da177e4SLinus Torvalds flags |= RTCF_LOCAL; 24921da177e4SLinus Torvalds goto make_route; 24931da177e4SLinus Torvalds } 24941da177e4SLinus Torvalds 24953abd1adeSDavid Ahern fib_select_path(net, res, fl4, skb); 24961da177e4SLinus Torvalds 24973abd1adeSDavid Ahern dev_out = FIB_RES_DEV(*res); 2498813b3b5dSDavid S. Miller fl4->flowi4_oif = dev_out->ifindex; 24991da177e4SLinus Torvalds 25001da177e4SLinus Torvalds 25011da177e4SLinus Torvalds make_route: 25023abd1adeSDavid Ahern rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); 25031da177e4SLinus Torvalds 2504010c2708SDavid S. Miller out: 2505b23dd4feSDavid S. Miller return rth; 25061da177e4SLinus Torvalds } 2507d8c97a94SArnaldo Carvalho de Melo 2508ae2688d5SJianzhao Wang static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) 2509ae2688d5SJianzhao Wang { 2510ae2688d5SJianzhao Wang return NULL; 2511ae2688d5SJianzhao Wang } 2512ae2688d5SJianzhao Wang 2513ebb762f2SSteffen Klassert static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) 2514ec831ea7SRoland Dreier { 2515618f9bc7SSteffen Klassert unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 2516618f9bc7SSteffen Klassert 2517618f9bc7SSteffen Klassert return mtu ? : dst->dev->mtu; 2518ec831ea7SRoland Dreier } 2519ec831ea7SRoland Dreier 25206700c270SDavid S. Miller static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 25216700c270SDavid S. Miller struct sk_buff *skb, u32 mtu) 252214e50e57SDavid S. Miller { 252314e50e57SDavid S. Miller } 252414e50e57SDavid S. Miller 25256700c270SDavid S. Miller static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 25266700c270SDavid S. Miller struct sk_buff *skb) 2527b587ee3bSDavid S. Miller { 2528b587ee3bSDavid S. Miller } 2529b587ee3bSDavid S. Miller 25300972ddb2SHeld Bernhard static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, 25310972ddb2SHeld Bernhard unsigned long old) 25320972ddb2SHeld Bernhard { 25330972ddb2SHeld Bernhard return NULL; 25340972ddb2SHeld Bernhard } 25350972ddb2SHeld Bernhard 253614e50e57SDavid S. Miller static struct dst_ops ipv4_dst_blackhole_ops = { 253714e50e57SDavid S. Miller .family = AF_INET, 2538ae2688d5SJianzhao Wang .check = ipv4_blackhole_dst_check, 2539ebb762f2SSteffen Klassert .mtu = ipv4_blackhole_mtu, 2540214f45c9SEric Dumazet .default_advmss = ipv4_default_advmss, 254114e50e57SDavid S. Miller .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2542b587ee3bSDavid S. Miller .redirect = ipv4_rt_blackhole_redirect, 25430972ddb2SHeld Bernhard .cow_metrics = ipv4_rt_blackhole_cow_metrics, 2544d3aaeb38SDavid S. Miller .neigh_lookup = ipv4_neigh_lookup, 254514e50e57SDavid S. Miller }; 254614e50e57SDavid S. Miller 25472774c131SDavid S. Miller struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) 254814e50e57SDavid S. Miller { 25492774c131SDavid S. Miller struct rtable *ort = (struct rtable *) dst_orig; 2550f5b0a874SDavid S. Miller struct rtable *rt; 255114e50e57SDavid S. Miller 25526c0e7284SSteffen Klassert rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); 255314e50e57SDavid S. Miller if (rt) { 2554d8d1f30bSChangli Gao struct dst_entry *new = &rt->dst; 255514e50e57SDavid S. Miller 255614e50e57SDavid S. Miller new->__use = 1; 2557352e512cSHerbert Xu new->input = dst_discard; 2558ede2059dSEric W. Biederman new->output = dst_discard_out; 255914e50e57SDavid S. Miller 25601dbe3252SWei Wang new->dev = net->loopback_dev; 256114e50e57SDavid S. Miller if (new->dev) 256214e50e57SDavid S. Miller dev_hold(new->dev); 256314e50e57SDavid S. Miller 25649917e1e8SDavid S. Miller rt->rt_is_input = ort->rt_is_input; 25655e2b61f7SDavid S. Miller rt->rt_iif = ort->rt_iif; 25665943634fSDavid S. Miller rt->rt_pmtu = ort->rt_pmtu; 2567d52e5a7eSSabrina Dubroca rt->rt_mtu_locked = ort->rt_mtu_locked; 256814e50e57SDavid S. Miller 2569ca4c3fc2Sfan.du rt->rt_genid = rt_genid_ipv4(net); 257014e50e57SDavid S. Miller rt->rt_flags = ort->rt_flags; 257114e50e57SDavid S. Miller rt->rt_type = ort->rt_type; 257214e50e57SDavid S. Miller rt->rt_gateway = ort->rt_gateway; 2573155e8336SJulian Anastasov rt->rt_uses_gateway = ort->rt_uses_gateway; 257414e50e57SDavid S. Miller 2575caacf05eSDavid S. Miller INIT_LIST_HEAD(&rt->rt_uncached); 257614e50e57SDavid S. Miller } 257714e50e57SDavid S. Miller 25782774c131SDavid S. Miller dst_release(dst_orig); 25792774c131SDavid S. Miller 25802774c131SDavid S. Miller return rt ? &rt->dst : ERR_PTR(-ENOMEM); 258114e50e57SDavid S. Miller } 258214e50e57SDavid S. Miller 25839d6ec938SDavid S. Miller struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, 25846f9c9615SEric Dumazet const struct sock *sk) 25851da177e4SLinus Torvalds { 25869d6ec938SDavid S. Miller struct rtable *rt = __ip_route_output_key(net, flp4); 25871da177e4SLinus Torvalds 2588b23dd4feSDavid S. Miller if (IS_ERR(rt)) 2589b23dd4feSDavid S. Miller return rt; 25901da177e4SLinus Torvalds 259156157872SDavid S. Miller if (flp4->flowi4_proto) 2592f92ee619SSteffen Klassert rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, 25939d6ec938SDavid S. Miller flowi4_to_flowi(flp4), 25949d6ec938SDavid S. Miller sk, 0); 25951da177e4SLinus Torvalds 2596b23dd4feSDavid S. Miller return rt; 25971da177e4SLinus Torvalds } 2598d8c97a94SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(ip_route_output_flow); 2599d8c97a94SArnaldo Carvalho de Melo 26003765d35eSDavid Ahern /* called with rcu_read_lock held */ 2601404eb77eSRoopa Prabhu static int rt_fill_info(struct net *net, __be32 dst, __be32 src, 2602404eb77eSRoopa Prabhu struct rtable *rt, u32 table_id, struct flowi4 *fl4, 2603404eb77eSRoopa Prabhu struct sk_buff *skb, u32 portid, u32 seq) 26041da177e4SLinus Torvalds { 26051da177e4SLinus Torvalds struct rtmsg *r; 26061da177e4SLinus Torvalds struct nlmsghdr *nlh; 26072bc8ca40SSteffen Klassert unsigned long expires = 0; 2608f185071dSDavid S. Miller u32 error; 2609521f5490SJulian Anastasov u32 metrics[RTAX_MAX]; 2610be403ea1SThomas Graf 2611d3166e0cSDavid Ahern nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0); 261251456b29SIan Morris if (!nlh) 261326932566SPatrick McHardy return -EMSGSIZE; 2614be403ea1SThomas Graf 2615be403ea1SThomas Graf r = nlmsg_data(nlh); 26161da177e4SLinus Torvalds r->rtm_family = AF_INET; 26171da177e4SLinus Torvalds r->rtm_dst_len = 32; 26181da177e4SLinus Torvalds r->rtm_src_len = 0; 2619d6c0a4f6SDavid Miller r->rtm_tos = fl4->flowi4_tos; 26208a430ed5SDavid Ahern r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; 2621c36ba660SDavid Ahern if (nla_put_u32(skb, RTA_TABLE, table_id)) 2622f3756b79SDavid S. Miller goto nla_put_failure; 26231da177e4SLinus Torvalds r->rtm_type = rt->rt_type; 26241da177e4SLinus Torvalds r->rtm_scope = RT_SCOPE_UNIVERSE; 26251da177e4SLinus Torvalds r->rtm_protocol = RTPROT_UNSPEC; 26261da177e4SLinus Torvalds r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 26271da177e4SLinus Torvalds if (rt->rt_flags & RTCF_NOTIFY) 26281da177e4SLinus Torvalds r->rtm_flags |= RTM_F_NOTIFY; 2629df4d9254SHannes Frederic Sowa if (IPCB(skb)->flags & IPSKB_DOREDIRECT) 2630df4d9254SHannes Frederic Sowa r->rtm_flags |= RTCF_DOREDIRECT; 2631be403ea1SThomas Graf 2632930345eaSJiri Benc if (nla_put_in_addr(skb, RTA_DST, dst)) 2633f3756b79SDavid S. Miller goto nla_put_failure; 26341a00fee4SDavid Miller if (src) { 26351da177e4SLinus Torvalds r->rtm_src_len = 32; 2636930345eaSJiri Benc if (nla_put_in_addr(skb, RTA_SRC, src)) 2637f3756b79SDavid S. Miller goto nla_put_failure; 26381da177e4SLinus Torvalds } 2639f3756b79SDavid S. Miller if (rt->dst.dev && 2640f3756b79SDavid S. Miller nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2641f3756b79SDavid S. Miller goto nla_put_failure; 2642c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 2643f3756b79SDavid S. Miller if (rt->dst.tclassid && 2644f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) 2645f3756b79SDavid S. Miller goto nla_put_failure; 26461da177e4SLinus Torvalds #endif 264741347dcdSDavid S. Miller if (!rt_is_input_route(rt) && 2648d6c0a4f6SDavid Miller fl4->saddr != src) { 2649930345eaSJiri Benc if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) 2650f3756b79SDavid S. Miller goto nla_put_failure; 2651f3756b79SDavid S. Miller } 2652155e8336SJulian Anastasov if (rt->rt_uses_gateway && 2653930345eaSJiri Benc nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway)) 2654f3756b79SDavid S. Miller goto nla_put_failure; 2655be403ea1SThomas Graf 2656ee9a8f7aSSteffen Klassert expires = rt->dst.expires; 2657ee9a8f7aSSteffen Klassert if (expires) { 2658ee9a8f7aSSteffen Klassert unsigned long now = jiffies; 2659ee9a8f7aSSteffen Klassert 2660ee9a8f7aSSteffen Klassert if (time_before(now, expires)) 2661ee9a8f7aSSteffen Klassert expires -= now; 2662ee9a8f7aSSteffen Klassert else 2663ee9a8f7aSSteffen Klassert expires = 0; 2664ee9a8f7aSSteffen Klassert } 2665ee9a8f7aSSteffen Klassert 2666521f5490SJulian Anastasov memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 2667ee9a8f7aSSteffen Klassert if (rt->rt_pmtu && expires) 2668521f5490SJulian Anastasov metrics[RTAX_MTU - 1] = rt->rt_pmtu; 2669d52e5a7eSSabrina Dubroca if (rt->rt_mtu_locked && expires) 2670d52e5a7eSSabrina Dubroca metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); 2671521f5490SJulian Anastasov if (rtnetlink_put_metrics(skb, metrics) < 0) 2672be403ea1SThomas Graf goto nla_put_failure; 2673be403ea1SThomas Graf 2674b4869889SDavid Miller if (fl4->flowi4_mark && 267568aaed54Sstephen hemminger nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) 2676f3756b79SDavid S. Miller goto nla_put_failure; 2677963bfeeeSEric Dumazet 2678622ec2c9SLorenzo Colitti if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && 2679622ec2c9SLorenzo Colitti nla_put_u32(skb, RTA_UID, 2680622ec2c9SLorenzo Colitti from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) 2681622ec2c9SLorenzo Colitti goto nla_put_failure; 2682622ec2c9SLorenzo Colitti 2683d8d1f30bSChangli Gao error = rt->dst.error; 2684be403ea1SThomas Graf 2685c7537967SDavid S. Miller if (rt_is_input_route(rt)) { 26868caaf7b6SNicolas Dichtel #ifdef CONFIG_IP_MROUTE 26878caaf7b6SNicolas Dichtel if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && 26888caaf7b6SNicolas Dichtel IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { 26898caaf7b6SNicolas Dichtel int err = ipmr_get_route(net, skb, 26908caaf7b6SNicolas Dichtel fl4->saddr, fl4->daddr, 26919f09eaeaSDavid Ahern r, portid); 26922cf75070SNikolay Aleksandrov 26938caaf7b6SNicolas Dichtel if (err <= 0) { 26948caaf7b6SNicolas Dichtel if (err == 0) 26958caaf7b6SNicolas Dichtel return 0; 26968caaf7b6SNicolas Dichtel goto nla_put_failure; 26978caaf7b6SNicolas Dichtel } 26988caaf7b6SNicolas Dichtel } else 26998caaf7b6SNicolas Dichtel #endif 2700404eb77eSRoopa Prabhu if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) 2701f3756b79SDavid S. Miller goto nla_put_failure; 27021da177e4SLinus Torvalds } 27031da177e4SLinus Torvalds 2704f185071dSDavid S. Miller if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) 2705e3703b3dSThomas Graf goto nla_put_failure; 27061da177e4SLinus Torvalds 2707053c095aSJohannes Berg nlmsg_end(skb, nlh); 2708053c095aSJohannes Berg return 0; 2709be403ea1SThomas Graf 2710be403ea1SThomas Graf nla_put_failure: 271126932566SPatrick McHardy nlmsg_cancel(skb, nlh); 271226932566SPatrick McHardy return -EMSGSIZE; 27131da177e4SLinus Torvalds } 27141da177e4SLinus Torvalds 2715404eb77eSRoopa Prabhu static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, 2716404eb77eSRoopa Prabhu u8 ip_proto, __be16 sport, 2717404eb77eSRoopa Prabhu __be16 dport) 2718404eb77eSRoopa Prabhu { 2719404eb77eSRoopa Prabhu struct sk_buff *skb; 2720404eb77eSRoopa Prabhu struct iphdr *iph; 2721404eb77eSRoopa Prabhu 2722404eb77eSRoopa Prabhu skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2723404eb77eSRoopa Prabhu if (!skb) 2724404eb77eSRoopa Prabhu return NULL; 2725404eb77eSRoopa Prabhu 2726404eb77eSRoopa Prabhu /* Reserve room for dummy headers, this skb can pass 2727404eb77eSRoopa Prabhu * through good chunk of routing engine. 2728404eb77eSRoopa Prabhu */ 2729404eb77eSRoopa Prabhu skb_reset_mac_header(skb); 2730404eb77eSRoopa Prabhu skb_reset_network_header(skb); 2731404eb77eSRoopa Prabhu skb->protocol = htons(ETH_P_IP); 2732404eb77eSRoopa Prabhu iph = skb_put(skb, sizeof(struct iphdr)); 2733404eb77eSRoopa Prabhu iph->protocol = ip_proto; 2734404eb77eSRoopa Prabhu iph->saddr = src; 2735404eb77eSRoopa Prabhu iph->daddr = dst; 2736404eb77eSRoopa Prabhu iph->version = 0x4; 2737404eb77eSRoopa Prabhu iph->frag_off = 0; 2738404eb77eSRoopa Prabhu iph->ihl = 0x5; 2739404eb77eSRoopa Prabhu skb_set_transport_header(skb, skb->len); 2740404eb77eSRoopa Prabhu 2741404eb77eSRoopa Prabhu switch (iph->protocol) { 2742404eb77eSRoopa Prabhu case IPPROTO_UDP: { 2743404eb77eSRoopa Prabhu struct udphdr *udph; 2744404eb77eSRoopa Prabhu 2745404eb77eSRoopa Prabhu udph = skb_put_zero(skb, sizeof(struct udphdr)); 2746404eb77eSRoopa Prabhu udph->source = sport; 2747404eb77eSRoopa Prabhu udph->dest = dport; 2748404eb77eSRoopa Prabhu udph->len = sizeof(struct udphdr); 2749404eb77eSRoopa Prabhu udph->check = 0; 2750404eb77eSRoopa Prabhu break; 2751404eb77eSRoopa Prabhu } 2752404eb77eSRoopa Prabhu case IPPROTO_TCP: { 2753404eb77eSRoopa Prabhu struct tcphdr *tcph; 2754404eb77eSRoopa Prabhu 2755404eb77eSRoopa Prabhu tcph = skb_put_zero(skb, sizeof(struct tcphdr)); 2756404eb77eSRoopa Prabhu tcph->source = sport; 2757404eb77eSRoopa Prabhu tcph->dest = dport; 2758404eb77eSRoopa Prabhu tcph->doff = sizeof(struct tcphdr) / 4; 2759404eb77eSRoopa Prabhu tcph->rst = 1; 2760404eb77eSRoopa Prabhu tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), 2761404eb77eSRoopa Prabhu src, dst, 0); 2762404eb77eSRoopa Prabhu break; 2763404eb77eSRoopa Prabhu } 2764404eb77eSRoopa Prabhu case IPPROTO_ICMP: { 2765404eb77eSRoopa Prabhu struct icmphdr *icmph; 2766404eb77eSRoopa Prabhu 2767404eb77eSRoopa Prabhu icmph = skb_put_zero(skb, sizeof(struct icmphdr)); 2768404eb77eSRoopa Prabhu icmph->type = ICMP_ECHO; 2769404eb77eSRoopa Prabhu icmph->code = 0; 2770404eb77eSRoopa Prabhu } 2771404eb77eSRoopa Prabhu } 2772404eb77eSRoopa Prabhu 2773404eb77eSRoopa Prabhu return skb; 2774404eb77eSRoopa Prabhu } 2775404eb77eSRoopa Prabhu 2776c21ef3e3SDavid Ahern static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2777c21ef3e3SDavid Ahern struct netlink_ext_ack *extack) 27781da177e4SLinus Torvalds { 27793b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(in_skb->sk); 2780d889ce3bSThomas Graf struct nlattr *tb[RTA_MAX+1]; 2781404eb77eSRoopa Prabhu u32 table_id = RT_TABLE_MAIN; 2782404eb77eSRoopa Prabhu __be16 sport = 0, dport = 0; 27833765d35eSDavid Ahern struct fib_result res = {}; 2784404eb77eSRoopa Prabhu u8 ip_proto = IPPROTO_UDP; 27851da177e4SLinus Torvalds struct rtable *rt = NULL; 2786404eb77eSRoopa Prabhu struct sk_buff *skb; 2787404eb77eSRoopa Prabhu struct rtmsg *rtm; 2788d6c0a4f6SDavid Miller struct flowi4 fl4; 27899e12bb22SAl Viro __be32 dst = 0; 27909e12bb22SAl Viro __be32 src = 0; 2791404eb77eSRoopa Prabhu kuid_t uid; 27929e12bb22SAl Viro u32 iif; 2793d889ce3bSThomas Graf int err; 2794963bfeeeSEric Dumazet int mark; 27951da177e4SLinus Torvalds 2796fceb6435SJohannes Berg err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, 2797c21ef3e3SDavid Ahern extack); 2798d889ce3bSThomas Graf if (err < 0) 2799404eb77eSRoopa Prabhu return err; 2800d889ce3bSThomas Graf 2801d889ce3bSThomas Graf rtm = nlmsg_data(nlh); 280267b61f6cSJiri Benc src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 280367b61f6cSJiri Benc dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2804d889ce3bSThomas Graf iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; 2805963bfeeeSEric Dumazet mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; 2806622ec2c9SLorenzo Colitti if (tb[RTA_UID]) 2807622ec2c9SLorenzo Colitti uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); 2808622ec2c9SLorenzo Colitti else 2809622ec2c9SLorenzo Colitti uid = (iif ? INVALID_UID : current_uid()); 28101da177e4SLinus Torvalds 2811404eb77eSRoopa Prabhu if (tb[RTA_IP_PROTO]) { 2812404eb77eSRoopa Prabhu err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], 2813404eb77eSRoopa Prabhu &ip_proto, extack); 2814404eb77eSRoopa Prabhu if (err) 2815404eb77eSRoopa Prabhu return err; 2816404eb77eSRoopa Prabhu } 2817bbadb9a2SFlorian Larysch 2818404eb77eSRoopa Prabhu if (tb[RTA_SPORT]) 2819404eb77eSRoopa Prabhu sport = nla_get_be16(tb[RTA_SPORT]); 2820404eb77eSRoopa Prabhu 2821404eb77eSRoopa Prabhu if (tb[RTA_DPORT]) 2822404eb77eSRoopa Prabhu dport = nla_get_be16(tb[RTA_DPORT]); 2823404eb77eSRoopa Prabhu 2824404eb77eSRoopa Prabhu skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport); 2825404eb77eSRoopa Prabhu if (!skb) 2826404eb77eSRoopa Prabhu return -ENOBUFS; 2827bbadb9a2SFlorian Larysch 2828d6c0a4f6SDavid Miller memset(&fl4, 0, sizeof(fl4)); 2829d6c0a4f6SDavid Miller fl4.daddr = dst; 2830d6c0a4f6SDavid Miller fl4.saddr = src; 2831d6c0a4f6SDavid Miller fl4.flowi4_tos = rtm->rtm_tos; 2832d6c0a4f6SDavid Miller fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; 2833d6c0a4f6SDavid Miller fl4.flowi4_mark = mark; 2834622ec2c9SLorenzo Colitti fl4.flowi4_uid = uid; 2835404eb77eSRoopa Prabhu if (sport) 2836404eb77eSRoopa Prabhu fl4.fl4_sport = sport; 2837404eb77eSRoopa Prabhu if (dport) 2838404eb77eSRoopa Prabhu fl4.fl4_dport = dport; 2839404eb77eSRoopa Prabhu fl4.flowi4_proto = ip_proto; 2840d6c0a4f6SDavid Miller 28413765d35eSDavid Ahern rcu_read_lock(); 28423765d35eSDavid Ahern 28431da177e4SLinus Torvalds if (iif) { 2844d889ce3bSThomas Graf struct net_device *dev; 2845d889ce3bSThomas Graf 28463765d35eSDavid Ahern dev = dev_get_by_index_rcu(net, iif); 284751456b29SIan Morris if (!dev) { 28481da177e4SLinus Torvalds err = -ENODEV; 2849404eb77eSRoopa Prabhu goto errout_rcu; 2850d889ce3bSThomas Graf } 2851d889ce3bSThomas Graf 2852404eb77eSRoopa Prabhu fl4.flowi4_iif = iif; /* for rt_fill_info */ 28531da177e4SLinus Torvalds skb->dev = dev; 2854963bfeeeSEric Dumazet skb->mark = mark; 28553765d35eSDavid Ahern err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, 28563765d35eSDavid Ahern dev, &res); 2857d889ce3bSThomas Graf 2858511c3f92SEric Dumazet rt = skb_rtable(skb); 2859d8d1f30bSChangli Gao if (err == 0 && rt->dst.error) 2860d8d1f30bSChangli Gao err = -rt->dst.error; 28611da177e4SLinus Torvalds } else { 28626503a304SLorenzo Colitti fl4.flowi4_iif = LOOPBACK_IFINDEX; 28633765d35eSDavid Ahern rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); 2864b23dd4feSDavid S. Miller err = 0; 2865b23dd4feSDavid S. Miller if (IS_ERR(rt)) 2866b23dd4feSDavid S. Miller err = PTR_ERR(rt); 28672c87d63aSFlorian Westphal else 28682c87d63aSFlorian Westphal skb_dst_set(skb, &rt->dst); 28691da177e4SLinus Torvalds } 2870d889ce3bSThomas Graf 28711da177e4SLinus Torvalds if (err) 2872404eb77eSRoopa Prabhu goto errout_rcu; 28731da177e4SLinus Torvalds 28741da177e4SLinus Torvalds if (rtm->rtm_flags & RTM_F_NOTIFY) 28751da177e4SLinus Torvalds rt->rt_flags |= RTCF_NOTIFY; 28761da177e4SLinus Torvalds 2877c36ba660SDavid Ahern if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) 287868e813aaSDavid Ahern table_id = res.table ? res.table->tb_id : 0; 2879c36ba660SDavid Ahern 2880404eb77eSRoopa Prabhu /* reset skb for netlink reply msg */ 2881404eb77eSRoopa Prabhu skb_trim(skb, 0); 2882404eb77eSRoopa Prabhu skb_reset_network_header(skb); 2883404eb77eSRoopa Prabhu skb_reset_transport_header(skb); 2884404eb77eSRoopa Prabhu skb_reset_mac_header(skb); 2885404eb77eSRoopa Prabhu 2886bc3aae2bSRoopa Prabhu if (rtm->rtm_flags & RTM_F_FIB_MATCH) { 2887bc3aae2bSRoopa Prabhu if (!res.fi) { 2888bc3aae2bSRoopa Prabhu err = fib_props[res.type].error; 2889bc3aae2bSRoopa Prabhu if (!err) 2890bc3aae2bSRoopa Prabhu err = -EHOSTUNREACH; 2891404eb77eSRoopa Prabhu goto errout_rcu; 2892bc3aae2bSRoopa Prabhu } 2893b6179813SRoopa Prabhu err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, 2894b6179813SRoopa Prabhu nlh->nlmsg_seq, RTM_NEWROUTE, table_id, 2895b6179813SRoopa Prabhu rt->rt_type, res.prefix, res.prefixlen, 2896b6179813SRoopa Prabhu fl4.flowi4_tos, res.fi, 0); 2897bc3aae2bSRoopa Prabhu } else { 2898404eb77eSRoopa Prabhu err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb, 2899ba52d61eSRoopa Prabhu NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); 2900bc3aae2bSRoopa Prabhu } 29017b46a644SDavid S. Miller if (err < 0) 2902404eb77eSRoopa Prabhu goto errout_rcu; 29031da177e4SLinus Torvalds 29043765d35eSDavid Ahern rcu_read_unlock(); 29053765d35eSDavid Ahern 290615e47304SEric W. Biederman err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 29071da177e4SLinus Torvalds 2908d889ce3bSThomas Graf errout_free: 2909404eb77eSRoopa Prabhu return err; 2910404eb77eSRoopa Prabhu errout_rcu: 29113765d35eSDavid Ahern rcu_read_unlock(); 29121da177e4SLinus Torvalds kfree_skb(skb); 2913404eb77eSRoopa Prabhu goto errout_free; 29141da177e4SLinus Torvalds } 29151da177e4SLinus Torvalds 29161da177e4SLinus Torvalds void ip_rt_multicast_event(struct in_device *in_dev) 29171da177e4SLinus Torvalds { 29184ccfe6d4SNicolas Dichtel rt_cache_flush(dev_net(in_dev->dev)); 29191da177e4SLinus Torvalds } 29201da177e4SLinus Torvalds 29211da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL 2922082c7ca4SGao feng static int ip_rt_gc_interval __read_mostly = 60 * HZ; 2923082c7ca4SGao feng static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 2924082c7ca4SGao feng static int ip_rt_gc_elasticity __read_mostly = 8; 2925773daa3cSArnd Bergmann static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; 2926082c7ca4SGao feng 2927fe2c6338SJoe Perches static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, 29288d65af78SAlexey Dobriyan void __user *buffer, 29291da177e4SLinus Torvalds size_t *lenp, loff_t *ppos) 29301da177e4SLinus Torvalds { 29315aad1de5STimo Teräs struct net *net = (struct net *)__ctl->extra1; 29325aad1de5STimo Teräs 29331da177e4SLinus Torvalds if (write) { 29345aad1de5STimo Teräs rt_cache_flush(net); 29355aad1de5STimo Teräs fnhe_genid_bump(net); 29361da177e4SLinus Torvalds return 0; 29371da177e4SLinus Torvalds } 29381da177e4SLinus Torvalds 29391da177e4SLinus Torvalds return -EINVAL; 29401da177e4SLinus Torvalds } 29411da177e4SLinus Torvalds 2942fe2c6338SJoe Perches static struct ctl_table ipv4_route_table[] = { 29431da177e4SLinus Torvalds { 29441da177e4SLinus Torvalds .procname = "gc_thresh", 29451da177e4SLinus Torvalds .data = &ipv4_dst_ops.gc_thresh, 29461da177e4SLinus Torvalds .maxlen = sizeof(int), 29471da177e4SLinus Torvalds .mode = 0644, 29486d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 29491da177e4SLinus Torvalds }, 29501da177e4SLinus Torvalds { 29511da177e4SLinus Torvalds .procname = "max_size", 29521da177e4SLinus Torvalds .data = &ip_rt_max_size, 29531da177e4SLinus Torvalds .maxlen = sizeof(int), 29541da177e4SLinus Torvalds .mode = 0644, 29556d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 29561da177e4SLinus Torvalds }, 29571da177e4SLinus Torvalds { 29581da177e4SLinus Torvalds /* Deprecated. Use gc_min_interval_ms */ 29591da177e4SLinus Torvalds 29601da177e4SLinus Torvalds .procname = "gc_min_interval", 29611da177e4SLinus Torvalds .data = &ip_rt_gc_min_interval, 29621da177e4SLinus Torvalds .maxlen = sizeof(int), 29631da177e4SLinus Torvalds .mode = 0644, 29646d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 29651da177e4SLinus Torvalds }, 29661da177e4SLinus Torvalds { 29671da177e4SLinus Torvalds .procname = "gc_min_interval_ms", 29681da177e4SLinus Torvalds .data = &ip_rt_gc_min_interval, 29691da177e4SLinus Torvalds .maxlen = sizeof(int), 29701da177e4SLinus Torvalds .mode = 0644, 29716d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_ms_jiffies, 29721da177e4SLinus Torvalds }, 29731da177e4SLinus Torvalds { 29741da177e4SLinus Torvalds .procname = "gc_timeout", 29751da177e4SLinus Torvalds .data = &ip_rt_gc_timeout, 29761da177e4SLinus Torvalds .maxlen = sizeof(int), 29771da177e4SLinus Torvalds .mode = 0644, 29786d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 29791da177e4SLinus Torvalds }, 29801da177e4SLinus Torvalds { 29819f28a2fcSEric Dumazet .procname = "gc_interval", 29829f28a2fcSEric Dumazet .data = &ip_rt_gc_interval, 29839f28a2fcSEric Dumazet .maxlen = sizeof(int), 29849f28a2fcSEric Dumazet .mode = 0644, 29859f28a2fcSEric Dumazet .proc_handler = proc_dointvec_jiffies, 29869f28a2fcSEric Dumazet }, 29879f28a2fcSEric Dumazet { 29881da177e4SLinus Torvalds .procname = "redirect_load", 29891da177e4SLinus Torvalds .data = &ip_rt_redirect_load, 29901da177e4SLinus Torvalds .maxlen = sizeof(int), 29911da177e4SLinus Torvalds .mode = 0644, 29926d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 29931da177e4SLinus Torvalds }, 29941da177e4SLinus Torvalds { 29951da177e4SLinus Torvalds .procname = "redirect_number", 29961da177e4SLinus Torvalds .data = &ip_rt_redirect_number, 29971da177e4SLinus Torvalds .maxlen = sizeof(int), 29981da177e4SLinus Torvalds .mode = 0644, 29996d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 30001da177e4SLinus Torvalds }, 30011da177e4SLinus Torvalds { 30021da177e4SLinus Torvalds .procname = "redirect_silence", 30031da177e4SLinus Torvalds .data = &ip_rt_redirect_silence, 30041da177e4SLinus Torvalds .maxlen = sizeof(int), 30051da177e4SLinus Torvalds .mode = 0644, 30066d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 30071da177e4SLinus Torvalds }, 30081da177e4SLinus Torvalds { 30091da177e4SLinus Torvalds .procname = "error_cost", 30101da177e4SLinus Torvalds .data = &ip_rt_error_cost, 30111da177e4SLinus Torvalds .maxlen = sizeof(int), 30121da177e4SLinus Torvalds .mode = 0644, 30136d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 30141da177e4SLinus Torvalds }, 30151da177e4SLinus Torvalds { 30161da177e4SLinus Torvalds .procname = "error_burst", 30171da177e4SLinus Torvalds .data = &ip_rt_error_burst, 30181da177e4SLinus Torvalds .maxlen = sizeof(int), 30191da177e4SLinus Torvalds .mode = 0644, 30206d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 30211da177e4SLinus Torvalds }, 30221da177e4SLinus Torvalds { 30231da177e4SLinus Torvalds .procname = "gc_elasticity", 30241da177e4SLinus Torvalds .data = &ip_rt_gc_elasticity, 30251da177e4SLinus Torvalds .maxlen = sizeof(int), 30261da177e4SLinus Torvalds .mode = 0644, 30276d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 30281da177e4SLinus Torvalds }, 30291da177e4SLinus Torvalds { 30301da177e4SLinus Torvalds .procname = "mtu_expires", 30311da177e4SLinus Torvalds .data = &ip_rt_mtu_expires, 30321da177e4SLinus Torvalds .maxlen = sizeof(int), 30331da177e4SLinus Torvalds .mode = 0644, 30346d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 30351da177e4SLinus Torvalds }, 30361da177e4SLinus Torvalds { 30371da177e4SLinus Torvalds .procname = "min_pmtu", 30381da177e4SLinus Torvalds .data = &ip_rt_min_pmtu, 30391da177e4SLinus Torvalds .maxlen = sizeof(int), 30401da177e4SLinus Torvalds .mode = 0644, 3041c7272c2fSSabrina Dubroca .proc_handler = proc_dointvec_minmax, 3042c7272c2fSSabrina Dubroca .extra1 = &ip_min_valid_pmtu, 30431da177e4SLinus Torvalds }, 30441da177e4SLinus Torvalds { 30451da177e4SLinus Torvalds .procname = "min_adv_mss", 30461da177e4SLinus Torvalds .data = &ip_rt_min_advmss, 30471da177e4SLinus Torvalds .maxlen = sizeof(int), 30481da177e4SLinus Torvalds .mode = 0644, 30496d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 30501da177e4SLinus Torvalds }, 3051f8572d8fSEric W. Biederman { } 30521da177e4SLinus Torvalds }; 305339a23e75SDenis V. Lunev 305439a23e75SDenis V. Lunev static struct ctl_table ipv4_route_flush_table[] = { 305539a23e75SDenis V. Lunev { 305639a23e75SDenis V. Lunev .procname = "flush", 305739a23e75SDenis V. Lunev .maxlen = sizeof(int), 305839a23e75SDenis V. Lunev .mode = 0200, 30596d9f239aSAlexey Dobriyan .proc_handler = ipv4_sysctl_rtcache_flush, 306039a23e75SDenis V. Lunev }, 3061f8572d8fSEric W. Biederman { }, 306239a23e75SDenis V. Lunev }; 306339a23e75SDenis V. Lunev 306439a23e75SDenis V. Lunev static __net_init int sysctl_route_net_init(struct net *net) 306539a23e75SDenis V. Lunev { 306639a23e75SDenis V. Lunev struct ctl_table *tbl; 306739a23e75SDenis V. Lunev 306839a23e75SDenis V. Lunev tbl = ipv4_route_flush_table; 306909ad9bc7SOctavian Purdila if (!net_eq(net, &init_net)) { 307039a23e75SDenis V. Lunev tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); 307151456b29SIan Morris if (!tbl) 307239a23e75SDenis V. Lunev goto err_dup; 3073464dc801SEric W. Biederman 3074464dc801SEric W. Biederman /* Don't export sysctls to unprivileged users */ 3075464dc801SEric W. Biederman if (net->user_ns != &init_user_ns) 3076464dc801SEric W. Biederman tbl[0].procname = NULL; 307739a23e75SDenis V. Lunev } 307839a23e75SDenis V. Lunev tbl[0].extra1 = net; 307939a23e75SDenis V. Lunev 3080ec8f23ceSEric W. Biederman net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl); 308151456b29SIan Morris if (!net->ipv4.route_hdr) 308239a23e75SDenis V. Lunev goto err_reg; 308339a23e75SDenis V. Lunev return 0; 308439a23e75SDenis V. Lunev 308539a23e75SDenis V. Lunev err_reg: 308639a23e75SDenis V. Lunev if (tbl != ipv4_route_flush_table) 308739a23e75SDenis V. Lunev kfree(tbl); 308839a23e75SDenis V. Lunev err_dup: 308939a23e75SDenis V. Lunev return -ENOMEM; 309039a23e75SDenis V. Lunev } 309139a23e75SDenis V. Lunev 309239a23e75SDenis V. Lunev static __net_exit void sysctl_route_net_exit(struct net *net) 309339a23e75SDenis V. Lunev { 309439a23e75SDenis V. Lunev struct ctl_table *tbl; 309539a23e75SDenis V. Lunev 309639a23e75SDenis V. Lunev tbl = net->ipv4.route_hdr->ctl_table_arg; 309739a23e75SDenis V. Lunev unregister_net_sysctl_table(net->ipv4.route_hdr); 309839a23e75SDenis V. Lunev BUG_ON(tbl == ipv4_route_flush_table); 309939a23e75SDenis V. Lunev kfree(tbl); 310039a23e75SDenis V. Lunev } 310139a23e75SDenis V. Lunev 310239a23e75SDenis V. Lunev static __net_initdata struct pernet_operations sysctl_route_ops = { 310339a23e75SDenis V. Lunev .init = sysctl_route_net_init, 310439a23e75SDenis V. Lunev .exit = sysctl_route_net_exit, 310539a23e75SDenis V. Lunev }; 31061da177e4SLinus Torvalds #endif 31071da177e4SLinus Torvalds 31083ee94372SNeil Horman static __net_init int rt_genid_init(struct net *net) 31099f5e97e5SDenis V. Lunev { 3110ca4c3fc2Sfan.du atomic_set(&net->ipv4.rt_genid, 0); 31115aad1de5STimo Teräs atomic_set(&net->fnhe_genid, 0); 31127aed9f72SJason A. Donenfeld atomic_set(&net->ipv4.dev_addr_genid, get_random_int()); 31139f5e97e5SDenis V. Lunev return 0; 31149f5e97e5SDenis V. Lunev } 31159f5e97e5SDenis V. Lunev 31163ee94372SNeil Horman static __net_initdata struct pernet_operations rt_genid_ops = { 31173ee94372SNeil Horman .init = rt_genid_init, 31189f5e97e5SDenis V. Lunev }; 31199f5e97e5SDenis V. Lunev 3120c3426b47SDavid S. Miller static int __net_init ipv4_inetpeer_init(struct net *net) 3121c3426b47SDavid S. Miller { 3122c3426b47SDavid S. Miller struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 3123c3426b47SDavid S. Miller 3124c3426b47SDavid S. Miller if (!bp) 3125c3426b47SDavid S. Miller return -ENOMEM; 3126c3426b47SDavid S. Miller inet_peer_base_init(bp); 3127c3426b47SDavid S. Miller net->ipv4.peers = bp; 3128c3426b47SDavid S. Miller return 0; 3129c3426b47SDavid S. Miller } 3130c3426b47SDavid S. Miller 3131c3426b47SDavid S. Miller static void __net_exit ipv4_inetpeer_exit(struct net *net) 3132c3426b47SDavid S. Miller { 3133c3426b47SDavid S. Miller struct inet_peer_base *bp = net->ipv4.peers; 3134c3426b47SDavid S. Miller 3135c3426b47SDavid S. Miller net->ipv4.peers = NULL; 313656a6b248SDavid S. Miller inetpeer_invalidate_tree(bp); 3137c3426b47SDavid S. Miller kfree(bp); 3138c3426b47SDavid S. Miller } 3139c3426b47SDavid S. Miller 3140c3426b47SDavid S. Miller static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { 3141c3426b47SDavid S. Miller .init = ipv4_inetpeer_init, 3142c3426b47SDavid S. Miller .exit = ipv4_inetpeer_exit, 3143c3426b47SDavid S. Miller }; 31449f5e97e5SDenis V. Lunev 3145c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 31467d720c3eSTejun Heo struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3147c7066f70SPatrick McHardy #endif /* CONFIG_IP_ROUTE_CLASSID */ 31481da177e4SLinus Torvalds 31491da177e4SLinus Torvalds int __init ip_rt_init(void) 31501da177e4SLinus Torvalds { 31515055c371SEric Dumazet int cpu; 31521da177e4SLinus Torvalds 31536da2ec56SKees Cook ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents), 31546da2ec56SKees Cook GFP_KERNEL); 315573f156a6SEric Dumazet if (!ip_idents) 315673f156a6SEric Dumazet panic("IP: failed to allocate ip_idents\n"); 315773f156a6SEric Dumazet 315873f156a6SEric Dumazet prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); 315973f156a6SEric Dumazet 3160355b590cSEric Dumazet ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL); 3161355b590cSEric Dumazet if (!ip_tstamps) 3162355b590cSEric Dumazet panic("IP: failed to allocate ip_tstamps\n"); 3163355b590cSEric Dumazet 31645055c371SEric Dumazet for_each_possible_cpu(cpu) { 31655055c371SEric Dumazet struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); 31665055c371SEric Dumazet 31675055c371SEric Dumazet INIT_LIST_HEAD(&ul->head); 31685055c371SEric Dumazet spin_lock_init(&ul->lock); 31695055c371SEric Dumazet } 3170c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 31710dcec8c2SIngo Molnar ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 31721da177e4SLinus Torvalds if (!ip_rt_acct) 31731da177e4SLinus Torvalds panic("IP: failed to allocate ip_rt_acct\n"); 31741da177e4SLinus Torvalds #endif 31751da177e4SLinus Torvalds 3176e5d679f3SAlexey Dobriyan ipv4_dst_ops.kmem_cachep = 3177e5d679f3SAlexey Dobriyan kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, 317820c2df83SPaul Mundt SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 31791da177e4SLinus Torvalds 318014e50e57SDavid S. Miller ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; 318114e50e57SDavid S. Miller 3182fc66f95cSEric Dumazet if (dst_entries_init(&ipv4_dst_ops) < 0) 3183fc66f95cSEric Dumazet panic("IP: failed to allocate ipv4_dst_ops counter\n"); 3184fc66f95cSEric Dumazet 3185fc66f95cSEric Dumazet if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) 3186fc66f95cSEric Dumazet panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); 3187fc66f95cSEric Dumazet 318889aef892SDavid S. Miller ipv4_dst_ops.gc_thresh = ~0; 318989aef892SDavid S. Miller ip_rt_max_size = INT_MAX; 31901da177e4SLinus Torvalds 31911da177e4SLinus Torvalds devinet_init(); 31921da177e4SLinus Torvalds ip_fib_init(); 31931da177e4SLinus Torvalds 319473b38711SDenis V. Lunev if (ip_rt_proc_init()) 3195058bd4d2SJoe Perches pr_err("Unable to create route proc files\n"); 31961da177e4SLinus Torvalds #ifdef CONFIG_XFRM 31971da177e4SLinus Torvalds xfrm_init(); 3198703fb94eSSteffen Klassert xfrm4_init(); 31991da177e4SLinus Torvalds #endif 3200394f51abSFlorian Westphal rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, 3201394f51abSFlorian Westphal RTNL_FLAG_DOIT_UNLOCKED); 320263f3444fSThomas Graf 320339a23e75SDenis V. Lunev #ifdef CONFIG_SYSCTL 320439a23e75SDenis V. Lunev register_pernet_subsys(&sysctl_route_ops); 320539a23e75SDenis V. Lunev #endif 32063ee94372SNeil Horman register_pernet_subsys(&rt_genid_ops); 3207c3426b47SDavid S. Miller register_pernet_subsys(&ipv4_inetpeer_ops); 32081bcdca3fSTim Hansen return 0; 32091da177e4SLinus Torvalds } 32101da177e4SLinus Torvalds 3211a1bc6eb4SAl Viro #ifdef CONFIG_SYSCTL 3212eeb61f71SAl Viro /* 3213eeb61f71SAl Viro * We really need to sanitize the damn ipv4 init order, then all 3214eeb61f71SAl Viro * this nonsense will go away. 3215eeb61f71SAl Viro */ 3216eeb61f71SAl Viro void __init ip_static_sysctl_init(void) 3217eeb61f71SAl Viro { 32184e5ca785SEric W. Biederman register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table); 3219eeb61f71SAl Viro } 3220a1bc6eb4SAl Viro #endif 3221