12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 41da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 51da177e4SLinus Torvalds * interface as the means of communication with the user level. 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * ROUTE - implementation of the IP router. 81da177e4SLinus Torvalds * 902c30a84SJesper Juhl * Authors: Ross Biro 101da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 111da177e4SLinus Torvalds * Alan Cox, <gw4pts@gw4pts.ampr.org> 121da177e4SLinus Torvalds * Linus Torvalds, <Linus.Torvalds@helsinki.fi> 131da177e4SLinus Torvalds * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 141da177e4SLinus Torvalds * 151da177e4SLinus Torvalds * Fixes: 161da177e4SLinus Torvalds * Alan Cox : Verify area fixes. 171da177e4SLinus Torvalds * Alan Cox : cli() protects routing changes 181da177e4SLinus Torvalds * Rui Oliveira : ICMP routing table updates 191da177e4SLinus Torvalds * (rco@di.uminho.pt) Routing table insertion and update 201da177e4SLinus Torvalds * Linus Torvalds : Rewrote bits to be sensible 211da177e4SLinus Torvalds * Alan Cox : Added BSD route gw semantics 221da177e4SLinus Torvalds * Alan Cox : Super /proc >4K 231da177e4SLinus Torvalds * Alan Cox : MTU in route table 241da177e4SLinus Torvalds * Alan Cox : MSS actually. Also added the window 251da177e4SLinus Torvalds * clamper. 261da177e4SLinus Torvalds * Sam Lantinga : Fixed route matching in rt_del() 271da177e4SLinus Torvalds * Alan Cox : Routing cache support. 281da177e4SLinus Torvalds * Alan Cox : Removed compatibility cruft. 291da177e4SLinus Torvalds * Alan Cox : RTF_REJECT support. 301da177e4SLinus Torvalds * Alan Cox : TCP irtt support. 311da177e4SLinus Torvalds * Jonathan Naylor : Added Metric support. 321da177e4SLinus Torvalds * Miquel van Smoorenburg : BSD API fixes. 331da177e4SLinus Torvalds * Miquel van Smoorenburg : Metrics. 341da177e4SLinus Torvalds * Alan Cox : Use __u32 properly 351da177e4SLinus Torvalds * Alan Cox : Aligned routing errors more closely with BSD 361da177e4SLinus Torvalds * our system is still very different. 371da177e4SLinus Torvalds * Alan Cox : Faster /proc handling 381da177e4SLinus Torvalds * Alexey Kuznetsov : Massive rework to support tree based routing, 391da177e4SLinus Torvalds * routing caches and better behaviour. 401da177e4SLinus Torvalds * 411da177e4SLinus Torvalds * Olaf Erb : irtt wasn't being copied right. 421da177e4SLinus Torvalds * Bjorn Ekwall : Kerneld route support. 431da177e4SLinus Torvalds * Alan Cox : Multicast fixed (I hope) 441da177e4SLinus Torvalds * Pavel Krauz : Limited broadcast fixed 451da177e4SLinus Torvalds * Mike McLagan : Routing by source 461da177e4SLinus Torvalds * Alexey Kuznetsov : End of old history. Split to fib.c and 471da177e4SLinus Torvalds * route.c and rewritten from scratch. 481da177e4SLinus Torvalds * Andi Kleen : Load-limit warning messages. 491da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year coma. 501da177e4SLinus Torvalds * Vitaly E. Lavrov : Race condition in ip_route_input_slow. 511da177e4SLinus Torvalds * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. 521da177e4SLinus Torvalds * Vladimir V. Ivanov : IP rule info (flowid) is really useful. 531da177e4SLinus Torvalds * Marc Boucher : routing by fwmark 541da177e4SLinus Torvalds * Robert Olsson : Added rt_cache statistics 551da177e4SLinus Torvalds * Arnaldo C. Melo : Convert proc stuff to seq_file 56bb1d23b0SEric Dumazet * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. 57cef2685eSIlia Sotnikov * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect 58cef2685eSIlia Sotnikov * Ilia Sotnikov : Removed TOS from hash calculations 591da177e4SLinus Torvalds */ 601da177e4SLinus Torvalds 61afd46503SJoe Perches #define pr_fmt(fmt) "IPv4: " fmt 62afd46503SJoe Perches 631da177e4SLinus Torvalds #include <linux/module.h> 647c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 651da177e4SLinus Torvalds #include <linux/bitops.h> 661da177e4SLinus Torvalds #include <linux/types.h> 671da177e4SLinus Torvalds #include <linux/kernel.h> 681da177e4SLinus Torvalds #include <linux/mm.h> 691da177e4SLinus Torvalds #include <linux/string.h> 701da177e4SLinus Torvalds #include <linux/socket.h> 711da177e4SLinus Torvalds #include <linux/sockios.h> 721da177e4SLinus Torvalds #include <linux/errno.h> 731da177e4SLinus Torvalds #include <linux/in.h> 741da177e4SLinus Torvalds #include <linux/inet.h> 751da177e4SLinus Torvalds #include <linux/netdevice.h> 761da177e4SLinus Torvalds #include <linux/proc_fs.h> 771da177e4SLinus Torvalds #include <linux/init.h> 781da177e4SLinus Torvalds #include <linux/skbuff.h> 791da177e4SLinus Torvalds #include <linux/inetdevice.h> 801da177e4SLinus Torvalds #include <linux/igmp.h> 811da177e4SLinus Torvalds #include <linux/pkt_sched.h> 821da177e4SLinus Torvalds #include <linux/mroute.h> 831da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h> 841da177e4SLinus Torvalds #include <linux/random.h> 851da177e4SLinus Torvalds #include <linux/rcupdate.h> 861da177e4SLinus Torvalds #include <linux/times.h> 875a0e3ad6STejun Heo #include <linux/slab.h> 8873f156a6SEric Dumazet #include <linux/jhash.h> 89352e512cSHerbert Xu #include <net/dst.h> 901b7179d3SThomas Graf #include <net/dst_metadata.h> 91457c4cbcSEric W. Biederman #include <net/net_namespace.h> 921da177e4SLinus Torvalds #include <net/protocol.h> 931da177e4SLinus Torvalds #include <net/ip.h> 941da177e4SLinus Torvalds #include <net/route.h> 951da177e4SLinus Torvalds #include <net/inetpeer.h> 961da177e4SLinus Torvalds #include <net/sock.h> 971da177e4SLinus Torvalds #include <net/ip_fib.h> 985481d73fSDavid Ahern #include <net/nexthop.h> 991da177e4SLinus Torvalds #include <net/arp.h> 1001da177e4SLinus Torvalds #include <net/tcp.h> 1011da177e4SLinus Torvalds #include <net/icmp.h> 1021da177e4SLinus Torvalds #include <net/xfrm.h> 103571e7226SRoopa Prabhu #include <net/lwtunnel.h> 1048d71740cSTom Tucker #include <net/netevent.h> 10563f3444fSThomas Graf #include <net/rtnetlink.h> 1061da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL 1071da177e4SLinus Torvalds #include <linux/sysctl.h> 1081da177e4SLinus Torvalds #endif 1096e5714eaSDavid S. Miller #include <net/secure_seq.h> 1101b7179d3SThomas Graf #include <net/ip_tunnels.h> 111385add90SDavid Ahern #include <net/l3mdev.h> 1121da177e4SLinus Torvalds 113b6179813SRoopa Prabhu #include "fib_lookup.h" 114b6179813SRoopa Prabhu 11568a5e3ddSDavid S. Miller #define RT_FL_TOS(oldflp4) \ 116f61759e6SJulian Anastasov ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds #define RT_GC_TIMEOUT (300*HZ) 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds static int ip_rt_max_size; 121817bc4dbSStephen Hemminger static int ip_rt_redirect_number __read_mostly = 9; 122817bc4dbSStephen Hemminger static int ip_rt_redirect_load __read_mostly = HZ / 50; 123817bc4dbSStephen Hemminger static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); 124817bc4dbSStephen Hemminger static int ip_rt_error_cost __read_mostly = HZ; 125817bc4dbSStephen Hemminger static int ip_rt_error_burst __read_mostly = 5 * HZ; 126817bc4dbSStephen Hemminger static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; 127c7272c2fSSabrina Dubroca static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; 128817bc4dbSStephen Hemminger static int ip_rt_min_advmss __read_mostly = 256; 1299f28a2fcSEric Dumazet 130deed49dfSXin Long static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; 131c7272c2fSSabrina Dubroca 1321da177e4SLinus Torvalds /* 1331da177e4SLinus Torvalds * Interface to generic destination cache. 1341da177e4SLinus Torvalds */ 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); 1370dbaee3bSDavid S. Miller static unsigned int ipv4_default_advmss(const struct dst_entry *dst); 138ebb762f2SSteffen Klassert static unsigned int ipv4_mtu(const struct dst_entry *dst); 1391da177e4SLinus Torvalds static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 1401da177e4SLinus Torvalds static void ipv4_link_failure(struct sk_buff *skb); 1416700c270SDavid S. Miller static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1426700c270SDavid S. Miller struct sk_buff *skb, u32 mtu); 1436700c270SDavid S. Miller static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, 1446700c270SDavid S. Miller struct sk_buff *skb); 145caacf05eSDavid S. Miller static void ipv4_dst_destroy(struct dst_entry *dst); 1461da177e4SLinus Torvalds 14762fa8a84SDavid S. Miller static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) 14862fa8a84SDavid S. Miller { 14931248731SDavid S. Miller WARN_ON(1); 15031248731SDavid S. Miller return NULL; 15162fa8a84SDavid S. Miller } 15262fa8a84SDavid S. Miller 153f894cbf8SDavid S. Miller static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, 154f894cbf8SDavid S. Miller struct sk_buff *skb, 155f894cbf8SDavid S. Miller const void *daddr); 15663fca65dSJulian Anastasov static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); 157d3aaeb38SDavid S. Miller 1581da177e4SLinus Torvalds static struct dst_ops ipv4_dst_ops = { 1591da177e4SLinus Torvalds .family = AF_INET, 1601da177e4SLinus Torvalds .check = ipv4_dst_check, 1610dbaee3bSDavid S. Miller .default_advmss = ipv4_default_advmss, 162ebb762f2SSteffen Klassert .mtu = ipv4_mtu, 16362fa8a84SDavid S. Miller .cow_metrics = ipv4_cow_metrics, 164caacf05eSDavid S. Miller .destroy = ipv4_dst_destroy, 1651da177e4SLinus Torvalds .negative_advice = ipv4_negative_advice, 1661da177e4SLinus Torvalds .link_failure = ipv4_link_failure, 1671da177e4SLinus Torvalds .update_pmtu = ip_rt_update_pmtu, 168e47a185bSDavid S. Miller .redirect = ip_do_redirect, 169b92dacd4SEric W. Biederman .local_out = __ip_local_out, 170d3aaeb38SDavid S. Miller .neigh_lookup = ipv4_neigh_lookup, 17163fca65dSJulian Anastasov .confirm_neigh = ipv4_confirm_neigh, 1721da177e4SLinus Torvalds }; 1731da177e4SLinus Torvalds 1741da177e4SLinus Torvalds #define ECN_OR_COST(class) TC_PRIO_##class 1751da177e4SLinus Torvalds 1764839c52bSPhilippe De Muyter const __u8 ip_tos2prio[16] = { 1771da177e4SLinus Torvalds TC_PRIO_BESTEFFORT, 1784a2b9c37SDan Siemon ECN_OR_COST(BESTEFFORT), 1791da177e4SLinus Torvalds TC_PRIO_BESTEFFORT, 1801da177e4SLinus Torvalds ECN_OR_COST(BESTEFFORT), 1811da177e4SLinus Torvalds TC_PRIO_BULK, 1821da177e4SLinus Torvalds ECN_OR_COST(BULK), 1831da177e4SLinus Torvalds TC_PRIO_BULK, 1841da177e4SLinus Torvalds ECN_OR_COST(BULK), 1851da177e4SLinus Torvalds TC_PRIO_INTERACTIVE, 1861da177e4SLinus Torvalds ECN_OR_COST(INTERACTIVE), 1871da177e4SLinus Torvalds TC_PRIO_INTERACTIVE, 1881da177e4SLinus Torvalds ECN_OR_COST(INTERACTIVE), 1891da177e4SLinus Torvalds TC_PRIO_INTERACTIVE_BULK, 1901da177e4SLinus Torvalds ECN_OR_COST(INTERACTIVE_BULK), 1911da177e4SLinus Torvalds TC_PRIO_INTERACTIVE_BULK, 1921da177e4SLinus Torvalds ECN_OR_COST(INTERACTIVE_BULK) 1931da177e4SLinus Torvalds }; 194d4a96865SAmir Vadai EXPORT_SYMBOL(ip_tos2prio); 1951da177e4SLinus Torvalds 1962f970d83SEric Dumazet static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 1973ed66e91SChristoph Lameter #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) 1981da177e4SLinus Torvalds 1991da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 2001da177e4SLinus Torvalds static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 2011da177e4SLinus Torvalds { 20229e75252SEric Dumazet if (*pos) 20389aef892SDavid S. Miller return NULL; 20429e75252SEric Dumazet return SEQ_START_TOKEN; 2051da177e4SLinus Torvalds } 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2081da177e4SLinus Torvalds { 2091da177e4SLinus Torvalds ++*pos; 21089aef892SDavid S. Miller return NULL; 2111da177e4SLinus Torvalds } 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds static void rt_cache_seq_stop(struct seq_file *seq, void *v) 2141da177e4SLinus Torvalds { 2151da177e4SLinus Torvalds } 2161da177e4SLinus Torvalds 2171da177e4SLinus Torvalds static int rt_cache_seq_show(struct seq_file *seq, void *v) 2181da177e4SLinus Torvalds { 2191da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) 2201da177e4SLinus Torvalds seq_printf(seq, "%-127s\n", 2211da177e4SLinus Torvalds "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" 2221da177e4SLinus Torvalds "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" 2231da177e4SLinus Torvalds "HHUptod\tSpecDst"); 2241da177e4SLinus Torvalds return 0; 2251da177e4SLinus Torvalds } 2261da177e4SLinus Torvalds 227f690808eSStephen Hemminger static const struct seq_operations rt_cache_seq_ops = { 2281da177e4SLinus Torvalds .start = rt_cache_seq_start, 2291da177e4SLinus Torvalds .next = rt_cache_seq_next, 2301da177e4SLinus Torvalds .stop = rt_cache_seq_stop, 2311da177e4SLinus Torvalds .show = rt_cache_seq_show, 2321da177e4SLinus Torvalds }; 2331da177e4SLinus Torvalds 2341da177e4SLinus Torvalds static int rt_cache_seq_open(struct inode *inode, struct file *file) 2351da177e4SLinus Torvalds { 23689aef892SDavid S. Miller return seq_open(file, &rt_cache_seq_ops); 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2399a32144eSArjan van de Ven static const struct file_operations rt_cache_seq_fops = { 2401da177e4SLinus Torvalds .open = rt_cache_seq_open, 2411da177e4SLinus Torvalds .read = seq_read, 2421da177e4SLinus Torvalds .llseek = seq_lseek, 24389aef892SDavid S. Miller .release = seq_release, 2441da177e4SLinus Torvalds }; 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds 2471da177e4SLinus Torvalds static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) 2481da177e4SLinus Torvalds { 2491da177e4SLinus Torvalds int cpu; 2501da177e4SLinus Torvalds 2511da177e4SLinus Torvalds if (*pos == 0) 2521da177e4SLinus Torvalds return SEQ_START_TOKEN; 2531da177e4SLinus Torvalds 2540f23174aSRusty Russell for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 2551da177e4SLinus Torvalds if (!cpu_possible(cpu)) 2561da177e4SLinus Torvalds continue; 2571da177e4SLinus Torvalds *pos = cpu+1; 2582f970d83SEric Dumazet return &per_cpu(rt_cache_stat, cpu); 2591da177e4SLinus Torvalds } 2601da177e4SLinus Torvalds return NULL; 2611da177e4SLinus Torvalds } 2621da177e4SLinus Torvalds 2631da177e4SLinus Torvalds static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2641da177e4SLinus Torvalds { 2651da177e4SLinus Torvalds int cpu; 2661da177e4SLinus Torvalds 2670f23174aSRusty Russell for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 2681da177e4SLinus Torvalds if (!cpu_possible(cpu)) 2691da177e4SLinus Torvalds continue; 2701da177e4SLinus Torvalds *pos = cpu+1; 2712f970d83SEric Dumazet return &per_cpu(rt_cache_stat, cpu); 2721da177e4SLinus Torvalds } 2731da177e4SLinus Torvalds return NULL; 2741da177e4SLinus Torvalds 2751da177e4SLinus Torvalds } 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds static void rt_cpu_seq_stop(struct seq_file *seq, void *v) 2781da177e4SLinus Torvalds { 2791da177e4SLinus Torvalds 2801da177e4SLinus Torvalds } 2811da177e4SLinus Torvalds 2821da177e4SLinus Torvalds static int rt_cpu_seq_show(struct seq_file *seq, void *v) 2831da177e4SLinus Torvalds { 2841da177e4SLinus Torvalds struct rt_cache_stat *st = v; 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 2875bec0039SOlaf Rempel seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); 2881da177e4SLinus Torvalds return 0; 2891da177e4SLinus Torvalds } 2901da177e4SLinus Torvalds 2911da177e4SLinus Torvalds seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 2921da177e4SLinus Torvalds " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 293fc66f95cSEric Dumazet dst_entries_get_slow(&ipv4_dst_ops), 2940baf2b35SEric Dumazet 0, /* st->in_hit */ 2951da177e4SLinus Torvalds st->in_slow_tot, 2961da177e4SLinus Torvalds st->in_slow_mc, 2971da177e4SLinus Torvalds st->in_no_route, 2981da177e4SLinus Torvalds st->in_brd, 2991da177e4SLinus Torvalds st->in_martian_dst, 3001da177e4SLinus Torvalds st->in_martian_src, 3011da177e4SLinus Torvalds 3020baf2b35SEric Dumazet 0, /* st->out_hit */ 3031da177e4SLinus Torvalds st->out_slow_tot, 3041da177e4SLinus Torvalds st->out_slow_mc, 3051da177e4SLinus Torvalds 3060baf2b35SEric Dumazet 0, /* st->gc_total */ 3070baf2b35SEric Dumazet 0, /* st->gc_ignored */ 3080baf2b35SEric Dumazet 0, /* st->gc_goal_miss */ 3090baf2b35SEric Dumazet 0, /* st->gc_dst_overflow */ 3100baf2b35SEric Dumazet 0, /* st->in_hlist_search */ 3110baf2b35SEric Dumazet 0 /* st->out_hlist_search */ 3121da177e4SLinus Torvalds ); 3131da177e4SLinus Torvalds return 0; 3141da177e4SLinus Torvalds } 3151da177e4SLinus Torvalds 316f690808eSStephen Hemminger static const struct seq_operations rt_cpu_seq_ops = { 3171da177e4SLinus Torvalds .start = rt_cpu_seq_start, 3181da177e4SLinus Torvalds .next = rt_cpu_seq_next, 3191da177e4SLinus Torvalds .stop = rt_cpu_seq_stop, 3201da177e4SLinus Torvalds .show = rt_cpu_seq_show, 3211da177e4SLinus Torvalds }; 3221da177e4SLinus Torvalds 3231da177e4SLinus Torvalds 3241da177e4SLinus Torvalds static int rt_cpu_seq_open(struct inode *inode, struct file *file) 3251da177e4SLinus Torvalds { 3261da177e4SLinus Torvalds return seq_open(file, &rt_cpu_seq_ops); 3271da177e4SLinus Torvalds } 3281da177e4SLinus Torvalds 3299a32144eSArjan van de Ven static const struct file_operations rt_cpu_seq_fops = { 3301da177e4SLinus Torvalds .open = rt_cpu_seq_open, 3311da177e4SLinus Torvalds .read = seq_read, 3321da177e4SLinus Torvalds .llseek = seq_lseek, 3331da177e4SLinus Torvalds .release = seq_release, 3341da177e4SLinus Torvalds }; 3351da177e4SLinus Torvalds 336c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 337a661c419SAlexey Dobriyan static int rt_acct_proc_show(struct seq_file *m, void *v) 33878c686e9SPavel Emelyanov { 339a661c419SAlexey Dobriyan struct ip_rt_acct *dst, *src; 340a661c419SAlexey Dobriyan unsigned int i, j; 34178c686e9SPavel Emelyanov 342a661c419SAlexey Dobriyan dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); 343a661c419SAlexey Dobriyan if (!dst) 344a661c419SAlexey Dobriyan return -ENOMEM; 34578c686e9SPavel Emelyanov 346a661c419SAlexey Dobriyan for_each_possible_cpu(i) { 347a661c419SAlexey Dobriyan src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); 348a661c419SAlexey Dobriyan for (j = 0; j < 256; j++) { 349a661c419SAlexey Dobriyan dst[j].o_bytes += src[j].o_bytes; 350a661c419SAlexey Dobriyan dst[j].o_packets += src[j].o_packets; 351a661c419SAlexey Dobriyan dst[j].i_bytes += src[j].i_bytes; 352a661c419SAlexey Dobriyan dst[j].i_packets += src[j].i_packets; 353a661c419SAlexey Dobriyan } 354a661c419SAlexey Dobriyan } 355a661c419SAlexey Dobriyan 356a661c419SAlexey Dobriyan seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); 357a661c419SAlexey Dobriyan kfree(dst); 35878c686e9SPavel Emelyanov return 0; 35978c686e9SPavel Emelyanov } 36078c686e9SPavel Emelyanov #endif 361107f1634SPavel Emelyanov 36273b38711SDenis V. Lunev static int __net_init ip_rt_do_proc_init(struct net *net) 363107f1634SPavel Emelyanov { 364107f1634SPavel Emelyanov struct proc_dir_entry *pde; 365107f1634SPavel Emelyanov 366d6444062SJoe Perches pde = proc_create("rt_cache", 0444, net->proc_net, 367107f1634SPavel Emelyanov &rt_cache_seq_fops); 368107f1634SPavel Emelyanov if (!pde) 369107f1634SPavel Emelyanov goto err1; 370107f1634SPavel Emelyanov 371d6444062SJoe Perches pde = proc_create("rt_cache", 0444, 37277020720SWang Chen net->proc_net_stat, &rt_cpu_seq_fops); 373107f1634SPavel Emelyanov if (!pde) 374107f1634SPavel Emelyanov goto err2; 375107f1634SPavel Emelyanov 376c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 3773f3942acSChristoph Hellwig pde = proc_create_single("rt_acct", 0, net->proc_net, 3783f3942acSChristoph Hellwig rt_acct_proc_show); 379107f1634SPavel Emelyanov if (!pde) 380107f1634SPavel Emelyanov goto err3; 381107f1634SPavel Emelyanov #endif 382107f1634SPavel Emelyanov return 0; 383107f1634SPavel Emelyanov 384c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 385107f1634SPavel Emelyanov err3: 386107f1634SPavel Emelyanov remove_proc_entry("rt_cache", net->proc_net_stat); 387107f1634SPavel Emelyanov #endif 388107f1634SPavel Emelyanov err2: 389107f1634SPavel Emelyanov remove_proc_entry("rt_cache", net->proc_net); 390107f1634SPavel Emelyanov err1: 391107f1634SPavel Emelyanov return -ENOMEM; 392107f1634SPavel Emelyanov } 39373b38711SDenis V. Lunev 39473b38711SDenis V. Lunev static void __net_exit ip_rt_do_proc_exit(struct net *net) 39573b38711SDenis V. Lunev { 39673b38711SDenis V. Lunev remove_proc_entry("rt_cache", net->proc_net_stat); 39773b38711SDenis V. Lunev remove_proc_entry("rt_cache", net->proc_net); 398c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 39973b38711SDenis V. Lunev remove_proc_entry("rt_acct", net->proc_net); 4000a931acfSAlexey Dobriyan #endif 40173b38711SDenis V. Lunev } 40273b38711SDenis V. Lunev 40373b38711SDenis V. Lunev static struct pernet_operations ip_rt_proc_ops __net_initdata = { 40473b38711SDenis V. Lunev .init = ip_rt_do_proc_init, 40573b38711SDenis V. Lunev .exit = ip_rt_do_proc_exit, 40673b38711SDenis V. Lunev }; 40773b38711SDenis V. Lunev 40873b38711SDenis V. Lunev static int __init ip_rt_proc_init(void) 40973b38711SDenis V. Lunev { 41073b38711SDenis V. Lunev return register_pernet_subsys(&ip_rt_proc_ops); 41173b38711SDenis V. Lunev } 41273b38711SDenis V. Lunev 413107f1634SPavel Emelyanov #else 41473b38711SDenis V. Lunev static inline int ip_rt_proc_init(void) 415107f1634SPavel Emelyanov { 416107f1634SPavel Emelyanov return 0; 417107f1634SPavel Emelyanov } 4181da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 4191da177e4SLinus Torvalds 4204331debcSEric Dumazet static inline bool rt_is_expired(const struct rtable *rth) 421e84f84f2SDenis V. Lunev { 422ca4c3fc2Sfan.du return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); 423e84f84f2SDenis V. Lunev } 424e84f84f2SDenis V. Lunev 4254ccfe6d4SNicolas Dichtel void rt_cache_flush(struct net *net) 42629e75252SEric Dumazet { 427ca4c3fc2Sfan.du rt_genid_bump_ipv4(net); 42898376387SEric Dumazet } 42998376387SEric Dumazet 430f894cbf8SDavid S. Miller static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, 431f894cbf8SDavid S. Miller struct sk_buff *skb, 432f894cbf8SDavid S. Miller const void *daddr) 4333769cffbSDavid Miller { 4341550c171SDavid Ahern const struct rtable *rt = container_of(dst, struct rtable, dst); 435d3aaeb38SDavid S. Miller struct net_device *dev = dst->dev; 4363769cffbSDavid Miller struct neighbour *n; 4373769cffbSDavid Miller 4385c9f7c1dSDavid Ahern rcu_read_lock_bh(); 439d3aaeb38SDavid S. Miller 4405c9f7c1dSDavid Ahern if (likely(rt->rt_gw_family == AF_INET)) { 4415c9f7c1dSDavid Ahern n = ip_neigh_gw4(dev, rt->rt_gw4); 4425c9f7c1dSDavid Ahern } else if (rt->rt_gw_family == AF_INET6) { 4435c9f7c1dSDavid Ahern n = ip_neigh_gw6(dev, &rt->rt_gw6); 4445c9f7c1dSDavid Ahern } else { 4455c9f7c1dSDavid Ahern __be32 pkey; 4465c9f7c1dSDavid Ahern 4475c9f7c1dSDavid Ahern pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); 4485c9f7c1dSDavid Ahern n = ip_neigh_gw4(dev, pkey); 4495c9f7c1dSDavid Ahern } 4505c9f7c1dSDavid Ahern 4515c9f7c1dSDavid Ahern if (n && !refcount_inc_not_zero(&n->refcnt)) 4525c9f7c1dSDavid Ahern n = NULL; 4535c9f7c1dSDavid Ahern 4545c9f7c1dSDavid Ahern rcu_read_unlock_bh(); 4555c9f7c1dSDavid Ahern 456d3aaeb38SDavid S. Miller return n; 457d3aaeb38SDavid S. Miller } 458d3aaeb38SDavid S. Miller 45963fca65dSJulian Anastasov static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) 46063fca65dSJulian Anastasov { 4611550c171SDavid Ahern const struct rtable *rt = container_of(dst, struct rtable, dst); 46263fca65dSJulian Anastasov struct net_device *dev = dst->dev; 46363fca65dSJulian Anastasov const __be32 *pkey = daddr; 46463fca65dSJulian Anastasov 4656de9c055SDavid Ahern if (rt->rt_gw_family == AF_INET) { 4661550c171SDavid Ahern pkey = (const __be32 *)&rt->rt_gw4; 4676de9c055SDavid Ahern } else if (rt->rt_gw_family == AF_INET6) { 4686de9c055SDavid Ahern return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6); 4696de9c055SDavid Ahern } else if (!daddr || 47063fca65dSJulian Anastasov (rt->rt_flags & 4716de9c055SDavid Ahern (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { 47263fca65dSJulian Anastasov return; 4736de9c055SDavid Ahern } 47463fca65dSJulian Anastasov __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); 47563fca65dSJulian Anastasov } 47663fca65dSJulian Anastasov 47704ca6973SEric Dumazet #define IP_IDENTS_SZ 2048u 47804ca6973SEric Dumazet 479355b590cSEric Dumazet static atomic_t *ip_idents __read_mostly; 480355b590cSEric Dumazet static u32 *ip_tstamps __read_mostly; 48104ca6973SEric Dumazet 48204ca6973SEric Dumazet /* In order to protect privacy, we add a perturbation to identifiers 48304ca6973SEric Dumazet * if one generator is seldom used. This makes hard for an attacker 48404ca6973SEric Dumazet * to infer how many packets were sent between two points in time. 48504ca6973SEric Dumazet */ 48604ca6973SEric Dumazet u32 ip_idents_reserve(u32 hash, int segs) 48704ca6973SEric Dumazet { 488355b590cSEric Dumazet u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ; 489355b590cSEric Dumazet atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; 4906aa7de05SMark Rutland u32 old = READ_ONCE(*p_tstamp); 49104ca6973SEric Dumazet u32 now = (u32)jiffies; 492adb03115SEric Dumazet u32 new, delta = 0; 49304ca6973SEric Dumazet 494355b590cSEric Dumazet if (old != now && cmpxchg(p_tstamp, old, now) == old) 49504ca6973SEric Dumazet delta = prandom_u32_max(now - old); 49604ca6973SEric Dumazet 497adb03115SEric Dumazet /* Do not use atomic_add_return() as it makes UBSAN unhappy */ 498adb03115SEric Dumazet do { 499adb03115SEric Dumazet old = (u32)atomic_read(p_id); 500adb03115SEric Dumazet new = old + delta + segs; 501adb03115SEric Dumazet } while (atomic_cmpxchg(p_id, old, new) != old); 502adb03115SEric Dumazet 503adb03115SEric Dumazet return new - segs; 50404ca6973SEric Dumazet } 50504ca6973SEric Dumazet EXPORT_SYMBOL(ip_idents_reserve); 50673f156a6SEric Dumazet 507b6a7719aSHannes Frederic Sowa void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) 5081da177e4SLinus Torvalds { 50973f156a6SEric Dumazet u32 hash, id; 5101da177e4SLinus Torvalds 511df453700SEric Dumazet /* Note the following code is not safe, but this is okay. */ 512df453700SEric Dumazet if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) 513df453700SEric Dumazet get_random_bytes(&net->ipv4.ip_id_key, 514df453700SEric Dumazet sizeof(net->ipv4.ip_id_key)); 5151da177e4SLinus Torvalds 516df453700SEric Dumazet hash = siphash_3u32((__force u32)iph->daddr, 51704ca6973SEric Dumazet (__force u32)iph->saddr, 518df453700SEric Dumazet iph->protocol, 519df453700SEric Dumazet &net->ipv4.ip_id_key); 52073f156a6SEric Dumazet id = ip_idents_reserve(hash, segs); 52173f156a6SEric Dumazet iph->id = htons(id); 5221da177e4SLinus Torvalds } 5234bc2f18bSEric Dumazet EXPORT_SYMBOL(__ip_select_ident); 5241da177e4SLinus Torvalds 525e2d118a1SLorenzo Colitti static void __build_flow_key(const struct net *net, struct flowi4 *fl4, 526e2d118a1SLorenzo Colitti const struct sock *sk, 5274895c771SDavid S. Miller const struct iphdr *iph, 5284895c771SDavid S. Miller int oif, u8 tos, 5294895c771SDavid S. Miller u8 prot, u32 mark, int flow_flags) 5304895c771SDavid S. Miller { 5314895c771SDavid S. Miller if (sk) { 5324895c771SDavid S. Miller const struct inet_sock *inet = inet_sk(sk); 5334895c771SDavid S. Miller 5344895c771SDavid S. Miller oif = sk->sk_bound_dev_if; 5354895c771SDavid S. Miller mark = sk->sk_mark; 5364895c771SDavid S. Miller tos = RT_CONN_FLAGS(sk); 5374895c771SDavid S. Miller prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; 5384895c771SDavid S. Miller } 5394895c771SDavid S. Miller flowi4_init_output(fl4, oif, mark, tos, 5404895c771SDavid S. Miller RT_SCOPE_UNIVERSE, prot, 5414895c771SDavid S. Miller flow_flags, 542e2d118a1SLorenzo Colitti iph->daddr, iph->saddr, 0, 0, 543e2d118a1SLorenzo Colitti sock_net_uid(net, sk)); 5444895c771SDavid S. Miller } 5454895c771SDavid S. Miller 5465abf7f7eSEric Dumazet static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, 5475abf7f7eSEric Dumazet const struct sock *sk) 5484895c771SDavid S. Miller { 549d109e61bSLorenzo Colitti const struct net *net = dev_net(skb->dev); 5504895c771SDavid S. Miller const struct iphdr *iph = ip_hdr(skb); 5514895c771SDavid S. Miller int oif = skb->dev->ifindex; 5524895c771SDavid S. Miller u8 tos = RT_TOS(iph->tos); 5534895c771SDavid S. Miller u8 prot = iph->protocol; 5544895c771SDavid S. Miller u32 mark = skb->mark; 5554895c771SDavid S. Miller 556d109e61bSLorenzo Colitti __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); 5574895c771SDavid S. Miller } 5584895c771SDavid S. Miller 5595abf7f7eSEric Dumazet static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) 5604895c771SDavid S. Miller { 5614895c771SDavid S. Miller const struct inet_sock *inet = inet_sk(sk); 5625abf7f7eSEric Dumazet const struct ip_options_rcu *inet_opt; 5634895c771SDavid S. Miller __be32 daddr = inet->inet_daddr; 5644895c771SDavid S. Miller 5654895c771SDavid S. Miller rcu_read_lock(); 5664895c771SDavid S. Miller inet_opt = rcu_dereference(inet->inet_opt); 5674895c771SDavid S. Miller if (inet_opt && inet_opt->opt.srr) 5684895c771SDavid S. Miller daddr = inet_opt->opt.faddr; 5694895c771SDavid S. Miller flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 5704895c771SDavid S. Miller RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 5714895c771SDavid S. Miller inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 5724895c771SDavid S. Miller inet_sk_flowi_flags(sk), 573e2d118a1SLorenzo Colitti daddr, inet->inet_saddr, 0, 0, sk->sk_uid); 5744895c771SDavid S. Miller rcu_read_unlock(); 5754895c771SDavid S. Miller } 5764895c771SDavid S. Miller 5775abf7f7eSEric Dumazet static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, 5785abf7f7eSEric Dumazet const struct sk_buff *skb) 5794895c771SDavid S. Miller { 5804895c771SDavid S. Miller if (skb) 5814895c771SDavid S. Miller build_skb_flow_key(fl4, skb, sk); 5824895c771SDavid S. Miller else 5834895c771SDavid S. Miller build_sk_flow_key(fl4, sk); 5844895c771SDavid S. Miller } 5854895c771SDavid S. Miller 586c5038a83SDavid S. Miller static DEFINE_SPINLOCK(fnhe_lock); 5874895c771SDavid S. Miller 5882ffae99dSTimo Teräs static void fnhe_flush_routes(struct fib_nh_exception *fnhe) 5892ffae99dSTimo Teräs { 5902ffae99dSTimo Teräs struct rtable *rt; 5912ffae99dSTimo Teräs 5922ffae99dSTimo Teräs rt = rcu_dereference(fnhe->fnhe_rth_input); 5932ffae99dSTimo Teräs if (rt) { 5942ffae99dSTimo Teräs RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); 59595c47f9cSWei Wang dst_dev_put(&rt->dst); 5960830106cSWei Wang dst_release(&rt->dst); 5972ffae99dSTimo Teräs } 5982ffae99dSTimo Teräs rt = rcu_dereference(fnhe->fnhe_rth_output); 5992ffae99dSTimo Teräs if (rt) { 6002ffae99dSTimo Teräs RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); 60195c47f9cSWei Wang dst_dev_put(&rt->dst); 6020830106cSWei Wang dst_release(&rt->dst); 6032ffae99dSTimo Teräs } 6042ffae99dSTimo Teräs } 6052ffae99dSTimo Teräs 606aee06da6SJulian Anastasov static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 6074895c771SDavid S. Miller { 6084895c771SDavid S. Miller struct fib_nh_exception *fnhe, *oldest; 6094895c771SDavid S. Miller 6104895c771SDavid S. Miller oldest = rcu_dereference(hash->chain); 6114895c771SDavid S. Miller for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 6124895c771SDavid S. Miller fnhe = rcu_dereference(fnhe->fnhe_next)) { 6134895c771SDavid S. Miller if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 6144895c771SDavid S. Miller oldest = fnhe; 6154895c771SDavid S. Miller } 6162ffae99dSTimo Teräs fnhe_flush_routes(oldest); 6174895c771SDavid S. Miller return oldest; 6184895c771SDavid S. Miller } 6194895c771SDavid S. Miller 620d3a25c98SDavid S. Miller static inline u32 fnhe_hashfun(__be32 daddr) 621d3a25c98SDavid S. Miller { 622d546c621SEric Dumazet static u32 fnhe_hashrnd __read_mostly; 623d3a25c98SDavid S. Miller u32 hval; 624d3a25c98SDavid S. Miller 625d546c621SEric Dumazet net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd)); 626d546c621SEric Dumazet hval = jhash_1word((__force u32) daddr, fnhe_hashrnd); 627d546c621SEric Dumazet return hash_32(hval, FNHE_HASH_SHIFT); 628d3a25c98SDavid S. Miller } 629d3a25c98SDavid S. Miller 630387aa65aSTimo Teräs static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) 631387aa65aSTimo Teräs { 632387aa65aSTimo Teräs rt->rt_pmtu = fnhe->fnhe_pmtu; 633d52e5a7eSSabrina Dubroca rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; 634387aa65aSTimo Teräs rt->dst.expires = fnhe->fnhe_expires; 635387aa65aSTimo Teräs 636387aa65aSTimo Teräs if (fnhe->fnhe_gw) { 637387aa65aSTimo Teräs rt->rt_flags |= RTCF_REDIRECTED; 6381550c171SDavid Ahern rt->rt_gw_family = AF_INET; 6391550c171SDavid Ahern rt->rt_gw4 = fnhe->fnhe_gw; 640387aa65aSTimo Teräs } 641387aa65aSTimo Teräs } 642387aa65aSTimo Teräs 643a5995e71SDavid Ahern static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, 644a5995e71SDavid Ahern __be32 gw, u32 pmtu, bool lock, 645a5995e71SDavid Ahern unsigned long expires) 6464895c771SDavid S. Miller { 647aee06da6SJulian Anastasov struct fnhe_hash_bucket *hash; 6484895c771SDavid S. Miller struct fib_nh_exception *fnhe; 649387aa65aSTimo Teräs struct rtable *rt; 650cebe84c6SXin Long u32 genid, hval; 651387aa65aSTimo Teräs unsigned int i; 6524895c771SDavid S. Miller int depth; 653cebe84c6SXin Long 654a5995e71SDavid Ahern genid = fnhe_genid(dev_net(nhc->nhc_dev)); 655cebe84c6SXin Long hval = fnhe_hashfun(daddr); 6564895c771SDavid S. Miller 657c5038a83SDavid S. Miller spin_lock_bh(&fnhe_lock); 658aee06da6SJulian Anastasov 659a5995e71SDavid Ahern hash = rcu_dereference(nhc->nhc_exceptions); 6604895c771SDavid S. Miller if (!hash) { 6616396bb22SKees Cook hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); 6624895c771SDavid S. Miller if (!hash) 663aee06da6SJulian Anastasov goto out_unlock; 664a5995e71SDavid Ahern rcu_assign_pointer(nhc->nhc_exceptions, hash); 6654895c771SDavid S. Miller } 6664895c771SDavid S. Miller 6674895c771SDavid S. Miller hash += hval; 6684895c771SDavid S. Miller 6694895c771SDavid S. Miller depth = 0; 6704895c771SDavid S. Miller for (fnhe = rcu_dereference(hash->chain); fnhe; 6714895c771SDavid S. Miller fnhe = rcu_dereference(fnhe->fnhe_next)) { 6724895c771SDavid S. Miller if (fnhe->fnhe_daddr == daddr) 673aee06da6SJulian Anastasov break; 6744895c771SDavid S. Miller depth++; 6754895c771SDavid S. Miller } 6764895c771SDavid S. Miller 677aee06da6SJulian Anastasov if (fnhe) { 678cebe84c6SXin Long if (fnhe->fnhe_genid != genid) 679cebe84c6SXin Long fnhe->fnhe_genid = genid; 680aee06da6SJulian Anastasov if (gw) 681aee06da6SJulian Anastasov fnhe->fnhe_gw = gw; 682d52e5a7eSSabrina Dubroca if (pmtu) { 683aee06da6SJulian Anastasov fnhe->fnhe_pmtu = pmtu; 684d52e5a7eSSabrina Dubroca fnhe->fnhe_mtu_locked = lock; 685d52e5a7eSSabrina Dubroca } 686387aa65aSTimo Teräs fnhe->fnhe_expires = max(1UL, expires); 687387aa65aSTimo Teräs /* Update all cached dsts too */ 6882ffae99dSTimo Teräs rt = rcu_dereference(fnhe->fnhe_rth_input); 6892ffae99dSTimo Teräs if (rt) 6902ffae99dSTimo Teräs fill_route_from_fnhe(rt, fnhe); 6912ffae99dSTimo Teräs rt = rcu_dereference(fnhe->fnhe_rth_output); 692387aa65aSTimo Teräs if (rt) 693387aa65aSTimo Teräs fill_route_from_fnhe(rt, fnhe); 694aee06da6SJulian Anastasov } else { 695aee06da6SJulian Anastasov if (depth > FNHE_RECLAIM_DEPTH) 696aee06da6SJulian Anastasov fnhe = fnhe_oldest(hash); 697aee06da6SJulian Anastasov else { 6984895c771SDavid S. Miller fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); 6994895c771SDavid S. Miller if (!fnhe) 700aee06da6SJulian Anastasov goto out_unlock; 7014895c771SDavid S. Miller 7024895c771SDavid S. Miller fnhe->fnhe_next = hash->chain; 7034895c771SDavid S. Miller rcu_assign_pointer(hash->chain, fnhe); 704aee06da6SJulian Anastasov } 705cebe84c6SXin Long fnhe->fnhe_genid = genid; 7064895c771SDavid S. Miller fnhe->fnhe_daddr = daddr; 707aee06da6SJulian Anastasov fnhe->fnhe_gw = gw; 708aee06da6SJulian Anastasov fnhe->fnhe_pmtu = pmtu; 709d52e5a7eSSabrina Dubroca fnhe->fnhe_mtu_locked = lock; 71094720e3aSJulian Anastasov fnhe->fnhe_expires = max(1UL, expires); 711387aa65aSTimo Teräs 712387aa65aSTimo Teräs /* Exception created; mark the cached routes for the nexthop 713387aa65aSTimo Teräs * stale, so anyone caching it rechecks if this exception 714387aa65aSTimo Teräs * applies to them. 715387aa65aSTimo Teräs */ 7160f457a36SDavid Ahern rt = rcu_dereference(nhc->nhc_rth_input); 7172ffae99dSTimo Teräs if (rt) 7182ffae99dSTimo Teräs rt->dst.obsolete = DST_OBSOLETE_KILL; 7192ffae99dSTimo Teräs 720387aa65aSTimo Teräs for_each_possible_cpu(i) { 721387aa65aSTimo Teräs struct rtable __rcu **prt; 7220f457a36SDavid Ahern prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); 723387aa65aSTimo Teräs rt = rcu_dereference(*prt); 724387aa65aSTimo Teräs if (rt) 725387aa65aSTimo Teräs rt->dst.obsolete = DST_OBSOLETE_KILL; 726387aa65aSTimo Teräs } 727aee06da6SJulian Anastasov } 728aee06da6SJulian Anastasov 7294895c771SDavid S. Miller fnhe->fnhe_stamp = jiffies; 730aee06da6SJulian Anastasov 731aee06da6SJulian Anastasov out_unlock: 732c5038a83SDavid S. Miller spin_unlock_bh(&fnhe_lock); 7334895c771SDavid S. Miller } 7344895c771SDavid S. Miller 735ceb33206SDavid S. Miller static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, 736ceb33206SDavid S. Miller bool kill_route) 7371da177e4SLinus Torvalds { 738e47a185bSDavid S. Miller __be32 new_gw = icmp_hdr(skb)->un.gateway; 73994206125SDavid S. Miller __be32 old_gw = ip_hdr(skb)->saddr; 740e47a185bSDavid S. Miller struct net_device *dev = skb->dev; 741e47a185bSDavid S. Miller struct in_device *in_dev; 7424895c771SDavid S. Miller struct fib_result res; 743e47a185bSDavid S. Miller struct neighbour *n; 744317805b8SDenis V. Lunev struct net *net; 7451da177e4SLinus Torvalds 74694206125SDavid S. Miller switch (icmp_hdr(skb)->code & 7) { 74794206125SDavid S. Miller case ICMP_REDIR_NET: 74894206125SDavid S. Miller case ICMP_REDIR_NETTOS: 74994206125SDavid S. Miller case ICMP_REDIR_HOST: 75094206125SDavid S. Miller case ICMP_REDIR_HOSTTOS: 75194206125SDavid S. Miller break; 75294206125SDavid S. Miller 75394206125SDavid S. Miller default: 75494206125SDavid S. Miller return; 75594206125SDavid S. Miller } 75694206125SDavid S. Miller 7571550c171SDavid Ahern if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw) 758e47a185bSDavid S. Miller return; 759e47a185bSDavid S. Miller 760e47a185bSDavid S. Miller in_dev = __in_dev_get_rcu(dev); 761e47a185bSDavid S. Miller if (!in_dev) 762e47a185bSDavid S. Miller return; 763e47a185bSDavid S. Miller 764c346dca1SYOSHIFUJI Hideaki net = dev_net(dev); 7659d4fb27dSJoe Perches if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || 7669d4fb27dSJoe Perches ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || 7679d4fb27dSJoe Perches ipv4_is_zeronet(new_gw)) 7681da177e4SLinus Torvalds goto reject_redirect; 7691da177e4SLinus Torvalds 7701da177e4SLinus Torvalds if (!IN_DEV_SHARED_MEDIA(in_dev)) { 7711da177e4SLinus Torvalds if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 7721da177e4SLinus Torvalds goto reject_redirect; 7731da177e4SLinus Torvalds if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 7741da177e4SLinus Torvalds goto reject_redirect; 7751da177e4SLinus Torvalds } else { 776317805b8SDenis V. Lunev if (inet_addr_type(net, new_gw) != RTN_UNICAST) 7771da177e4SLinus Torvalds goto reject_redirect; 7781da177e4SLinus Torvalds } 7791da177e4SLinus Torvalds 780969447f2SStephen Suryaputra Lin n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); 781969447f2SStephen Suryaputra Lin if (!n) 782969447f2SStephen Suryaputra Lin n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); 7832c1a4311SWANG Cong if (!IS_ERR(n)) { 784e47a185bSDavid S. Miller if (!(n->nud_state & NUD_VALID)) { 785e47a185bSDavid S. Miller neigh_event_send(n, NULL); 786e47a185bSDavid S. Miller } else { 7870eeb075fSAndy Gospodarek if (fib_lookup(net, fl4, &res, 0) == 0) { 788eba618abSDavid Ahern struct fib_nh_common *nhc = FIB_RES_NHC(res); 7894895c771SDavid S. Miller 790a5995e71SDavid Ahern update_or_create_fnhe(nhc, fl4->daddr, new_gw, 791d52e5a7eSSabrina Dubroca 0, false, 792d52e5a7eSSabrina Dubroca jiffies + ip_rt_gc_timeout); 7934895c771SDavid S. Miller } 794ceb33206SDavid S. Miller if (kill_route) 795ceb33206SDavid S. Miller rt->dst.obsolete = DST_OBSOLETE_KILL; 796e47a185bSDavid S. Miller call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); 797e47a185bSDavid S. Miller } 798e47a185bSDavid S. Miller neigh_release(n); 799e47a185bSDavid S. Miller } 800e47a185bSDavid S. Miller return; 801e47a185bSDavid S. Miller 802e47a185bSDavid S. Miller reject_redirect: 803e47a185bSDavid S. Miller #ifdef CONFIG_IP_ROUTE_VERBOSE 80499ee038dSDavid S. Miller if (IN_DEV_LOG_MARTIANS(in_dev)) { 80599ee038dSDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 80699ee038dSDavid S. Miller __be32 daddr = iph->daddr; 80799ee038dSDavid S. Miller __be32 saddr = iph->saddr; 80899ee038dSDavid S. Miller 809e47a185bSDavid S. Miller net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" 810e47a185bSDavid S. Miller " Advised path = %pI4 -> %pI4\n", 811e47a185bSDavid S. Miller &old_gw, dev->name, &new_gw, 812e47a185bSDavid S. Miller &saddr, &daddr); 81399ee038dSDavid S. Miller } 814e47a185bSDavid S. Miller #endif 815e47a185bSDavid S. Miller ; 816e47a185bSDavid S. Miller } 817e47a185bSDavid S. Miller 8184895c771SDavid S. Miller static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 8194895c771SDavid S. Miller { 8204895c771SDavid S. Miller struct rtable *rt; 8214895c771SDavid S. Miller struct flowi4 fl4; 822f96ef988SMichal Kubecek const struct iphdr *iph = (const struct iphdr *) skb->data; 8237d995694SLorenzo Colitti struct net *net = dev_net(skb->dev); 824f96ef988SMichal Kubecek int oif = skb->dev->ifindex; 825f96ef988SMichal Kubecek u8 tos = RT_TOS(iph->tos); 826f96ef988SMichal Kubecek u8 prot = iph->protocol; 827f96ef988SMichal Kubecek u32 mark = skb->mark; 8284895c771SDavid S. Miller 8294895c771SDavid S. Miller rt = (struct rtable *) dst; 8304895c771SDavid S. Miller 8317d995694SLorenzo Colitti __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); 832ceb33206SDavid S. Miller __ip_do_redirect(rt, skb, &fl4, true); 8334895c771SDavid S. Miller } 8344895c771SDavid S. Miller 8351da177e4SLinus Torvalds static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 8361da177e4SLinus Torvalds { 8371da177e4SLinus Torvalds struct rtable *rt = (struct rtable *)dst; 8381da177e4SLinus Torvalds struct dst_entry *ret = dst; 8391da177e4SLinus Torvalds 8401da177e4SLinus Torvalds if (rt) { 841d11a4dc1STimo Teräs if (dst->obsolete > 0) { 8421da177e4SLinus Torvalds ip_rt_put(rt); 8431da177e4SLinus Torvalds ret = NULL; 8445943634fSDavid S. Miller } else if ((rt->rt_flags & RTCF_REDIRECTED) || 8455943634fSDavid S. Miller rt->dst.expires) { 84689aef892SDavid S. Miller ip_rt_put(rt); 8471da177e4SLinus Torvalds ret = NULL; 8481da177e4SLinus Torvalds } 8491da177e4SLinus Torvalds } 8501da177e4SLinus Torvalds return ret; 8511da177e4SLinus Torvalds } 8521da177e4SLinus Torvalds 8531da177e4SLinus Torvalds /* 8541da177e4SLinus Torvalds * Algorithm: 8551da177e4SLinus Torvalds * 1. The first ip_rt_redirect_number redirects are sent 8561da177e4SLinus Torvalds * with exponential backoff, then we stop sending them at all, 8571da177e4SLinus Torvalds * assuming that the host ignores our redirects. 8581da177e4SLinus Torvalds * 2. If we did not see packets requiring redirects 8591da177e4SLinus Torvalds * during ip_rt_redirect_silence, we assume that the host 8601da177e4SLinus Torvalds * forgot redirected route and start to send redirects again. 8611da177e4SLinus Torvalds * 8621da177e4SLinus Torvalds * This algorithm is much cheaper and more intelligent than dumb load limiting 8631da177e4SLinus Torvalds * in icmp.c. 8641da177e4SLinus Torvalds * 8651da177e4SLinus Torvalds * NOTE. Do not forget to inhibit load limiting for redirects (redundant) 8661da177e4SLinus Torvalds * and "frag. need" (breaks PMTU discovery) in icmp.c. 8671da177e4SLinus Torvalds */ 8681da177e4SLinus Torvalds 8691da177e4SLinus Torvalds void ip_rt_send_redirect(struct sk_buff *skb) 8701da177e4SLinus Torvalds { 871511c3f92SEric Dumazet struct rtable *rt = skb_rtable(skb); 87230038fc6SEric Dumazet struct in_device *in_dev; 87392d86829SDavid S. Miller struct inet_peer *peer; 8741d861aa4SDavid S. Miller struct net *net; 87530038fc6SEric Dumazet int log_martians; 876192132b9SDavid Ahern int vif; 8771da177e4SLinus Torvalds 87830038fc6SEric Dumazet rcu_read_lock(); 879d8d1f30bSChangli Gao in_dev = __in_dev_get_rcu(rt->dst.dev); 88030038fc6SEric Dumazet if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { 88130038fc6SEric Dumazet rcu_read_unlock(); 8821da177e4SLinus Torvalds return; 88330038fc6SEric Dumazet } 88430038fc6SEric Dumazet log_martians = IN_DEV_LOG_MARTIANS(in_dev); 885385add90SDavid Ahern vif = l3mdev_master_ifindex_rcu(rt->dst.dev); 88630038fc6SEric Dumazet rcu_read_unlock(); 8871da177e4SLinus Torvalds 8881d861aa4SDavid S. Miller net = dev_net(rt->dst.dev); 889192132b9SDavid Ahern peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1); 89092d86829SDavid S. Miller if (!peer) { 891e81da0e1SJulian Anastasov icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, 892e81da0e1SJulian Anastasov rt_nexthop(rt, ip_hdr(skb)->daddr)); 89392d86829SDavid S. Miller return; 89492d86829SDavid S. Miller } 89592d86829SDavid S. Miller 8961da177e4SLinus Torvalds /* No redirected packets during ip_rt_redirect_silence; 8971da177e4SLinus Torvalds * reset the algorithm. 8981da177e4SLinus Torvalds */ 899c09551c6SLorenzo Bianconi if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { 90092d86829SDavid S. Miller peer->rate_tokens = 0; 901c09551c6SLorenzo Bianconi peer->n_redirects = 0; 902c09551c6SLorenzo Bianconi } 9031da177e4SLinus Torvalds 9041da177e4SLinus Torvalds /* Too many ignored redirects; do not send anything 905d8d1f30bSChangli Gao * set dst.rate_last to the last seen redirected packet. 9061da177e4SLinus Torvalds */ 907c09551c6SLorenzo Bianconi if (peer->n_redirects >= ip_rt_redirect_number) { 90892d86829SDavid S. Miller peer->rate_last = jiffies; 9091d861aa4SDavid S. Miller goto out_put_peer; 9101da177e4SLinus Torvalds } 9111da177e4SLinus Torvalds 9121da177e4SLinus Torvalds /* Check for load limit; set rate_last to the latest sent 9131da177e4SLinus Torvalds * redirect. 9141da177e4SLinus Torvalds */ 91592d86829SDavid S. Miller if (peer->rate_tokens == 0 || 91614fb8a76SLi Yewang time_after(jiffies, 91792d86829SDavid S. Miller (peer->rate_last + 91892d86829SDavid S. Miller (ip_rt_redirect_load << peer->rate_tokens)))) { 919e81da0e1SJulian Anastasov __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); 920e81da0e1SJulian Anastasov 921e81da0e1SJulian Anastasov icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); 92292d86829SDavid S. Miller peer->rate_last = jiffies; 92392d86829SDavid S. Miller ++peer->rate_tokens; 924c09551c6SLorenzo Bianconi ++peer->n_redirects; 9251da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE 92630038fc6SEric Dumazet if (log_martians && 927e87cc472SJoe Perches peer->rate_tokens == ip_rt_redirect_number) 928e87cc472SJoe Perches net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", 92992101b3bSDavid S. Miller &ip_hdr(skb)->saddr, inet_iif(skb), 930e81da0e1SJulian Anastasov &ip_hdr(skb)->daddr, &gw); 9311da177e4SLinus Torvalds #endif 9321da177e4SLinus Torvalds } 9331d861aa4SDavid S. Miller out_put_peer: 9341d861aa4SDavid S. Miller inet_putpeer(peer); 9351da177e4SLinus Torvalds } 9361da177e4SLinus Torvalds 9371da177e4SLinus Torvalds static int ip_error(struct sk_buff *skb) 9381da177e4SLinus Torvalds { 939511c3f92SEric Dumazet struct rtable *rt = skb_rtable(skb); 940e2c0dc1fSStephen Suryaputra struct net_device *dev = skb->dev; 941e2c0dc1fSStephen Suryaputra struct in_device *in_dev; 94292d86829SDavid S. Miller struct inet_peer *peer; 9431da177e4SLinus Torvalds unsigned long now; 944251da413SDavid S. Miller struct net *net; 94592d86829SDavid S. Miller bool send; 9461da177e4SLinus Torvalds int code; 9471da177e4SLinus Torvalds 948e2c0dc1fSStephen Suryaputra if (netif_is_l3_master(skb->dev)) { 949e2c0dc1fSStephen Suryaputra dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); 950e2c0dc1fSStephen Suryaputra if (!dev) 951e2c0dc1fSStephen Suryaputra goto out; 952e2c0dc1fSStephen Suryaputra } 953e2c0dc1fSStephen Suryaputra 954e2c0dc1fSStephen Suryaputra in_dev = __in_dev_get_rcu(dev); 955e2c0dc1fSStephen Suryaputra 956381c759dSEric W. Biederman /* IP on this device is disabled. */ 957381c759dSEric W. Biederman if (!in_dev) 958381c759dSEric W. Biederman goto out; 959381c759dSEric W. Biederman 960251da413SDavid S. Miller net = dev_net(rt->dst.dev); 961251da413SDavid S. Miller if (!IN_DEV_FORWARD(in_dev)) { 962251da413SDavid S. Miller switch (rt->dst.error) { 963251da413SDavid S. Miller case EHOSTUNREACH: 964b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); 965251da413SDavid S. Miller break; 966251da413SDavid S. Miller 967251da413SDavid S. Miller case ENETUNREACH: 968b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); 969251da413SDavid S. Miller break; 970251da413SDavid S. Miller } 971251da413SDavid S. Miller goto out; 972251da413SDavid S. Miller } 973251da413SDavid S. Miller 974d8d1f30bSChangli Gao switch (rt->dst.error) { 9751da177e4SLinus Torvalds case EINVAL: 9761da177e4SLinus Torvalds default: 9771da177e4SLinus Torvalds goto out; 9781da177e4SLinus Torvalds case EHOSTUNREACH: 9791da177e4SLinus Torvalds code = ICMP_HOST_UNREACH; 9801da177e4SLinus Torvalds break; 9811da177e4SLinus Torvalds case ENETUNREACH: 9821da177e4SLinus Torvalds code = ICMP_NET_UNREACH; 983b45386efSEric Dumazet __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); 9841da177e4SLinus Torvalds break; 9851da177e4SLinus Torvalds case EACCES: 9861da177e4SLinus Torvalds code = ICMP_PKT_FILTERED; 9871da177e4SLinus Torvalds break; 9881da177e4SLinus Torvalds } 9891da177e4SLinus Torvalds 990192132b9SDavid Ahern peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 991385add90SDavid Ahern l3mdev_master_ifindex(skb->dev), 1); 99292d86829SDavid S. Miller 99392d86829SDavid S. Miller send = true; 99492d86829SDavid S. Miller if (peer) { 9951da177e4SLinus Torvalds now = jiffies; 99692d86829SDavid S. Miller peer->rate_tokens += now - peer->rate_last; 99792d86829SDavid S. Miller if (peer->rate_tokens > ip_rt_error_burst) 99892d86829SDavid S. Miller peer->rate_tokens = ip_rt_error_burst; 99992d86829SDavid S. Miller peer->rate_last = now; 100092d86829SDavid S. Miller if (peer->rate_tokens >= ip_rt_error_cost) 100192d86829SDavid S. Miller peer->rate_tokens -= ip_rt_error_cost; 100292d86829SDavid S. Miller else 100392d86829SDavid S. Miller send = false; 10041d861aa4SDavid S. Miller inet_putpeer(peer); 10051da177e4SLinus Torvalds } 100692d86829SDavid S. Miller if (send) 100792d86829SDavid S. Miller icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 10081da177e4SLinus Torvalds 10091da177e4SLinus Torvalds out: kfree_skb(skb); 10101da177e4SLinus Torvalds return 0; 10111da177e4SLinus Torvalds } 10121da177e4SLinus Torvalds 1013d851c12bSSteffen Klassert static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) 10141da177e4SLinus Torvalds { 1015d851c12bSSteffen Klassert struct dst_entry *dst = &rt->dst; 101628d35bcdSSabrina Dubroca u32 old_mtu = ipv4_mtu(dst); 10174895c771SDavid S. Miller struct fib_result res; 1018d52e5a7eSSabrina Dubroca bool lock = false; 10192c8cec5cSDavid S. Miller 1020d52e5a7eSSabrina Dubroca if (ip_mtu_locked(dst)) 1021fa1e492aSSteffen Klassert return; 1022fa1e492aSSteffen Klassert 102328d35bcdSSabrina Dubroca if (old_mtu < mtu) 10243cdaa5beSLi Wei return; 10253cdaa5beSLi Wei 1026d52e5a7eSSabrina Dubroca if (mtu < ip_rt_min_pmtu) { 1027d52e5a7eSSabrina Dubroca lock = true; 102828d35bcdSSabrina Dubroca mtu = min(old_mtu, ip_rt_min_pmtu); 1029d52e5a7eSSabrina Dubroca } 103046af3180SHiroaki SHIMODA 103128d35bcdSSabrina Dubroca if (rt->rt_pmtu == mtu && !lock && 1032f016229eSTimo Teräs time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) 1033f016229eSTimo Teräs return; 1034f016229eSTimo Teräs 1035c5ae7d41SEric Dumazet rcu_read_lock(); 10360eeb075fSAndy Gospodarek if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { 1037eba618abSDavid Ahern struct fib_nh_common *nhc = FIB_RES_NHC(res); 10384895c771SDavid S. Miller 1039a5995e71SDavid Ahern update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, 1040aee06da6SJulian Anastasov jiffies + ip_rt_mtu_expires); 10414895c771SDavid S. Miller } 1042c5ae7d41SEric Dumazet rcu_read_unlock(); 10431da177e4SLinus Torvalds } 10441da177e4SLinus Torvalds 10454895c771SDavid S. Miller static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 10464895c771SDavid S. Miller struct sk_buff *skb, u32 mtu) 10474895c771SDavid S. Miller { 10484895c771SDavid S. Miller struct rtable *rt = (struct rtable *) dst; 10494895c771SDavid S. Miller struct flowi4 fl4; 10504895c771SDavid S. Miller 10514895c771SDavid S. Miller ip_rt_build_flow_key(&fl4, sk, skb); 1052d851c12bSSteffen Klassert __ip_rt_update_pmtu(rt, &fl4, mtu); 10534895c771SDavid S. Miller } 10544895c771SDavid S. Miller 105536393395SDavid S. Miller void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, 1056d888f396SMaciej Żenczykowski int oif, u8 protocol) 105736393395SDavid S. Miller { 105836393395SDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 105936393395SDavid S. Miller struct flowi4 fl4; 106036393395SDavid S. Miller struct rtable *rt; 1061d888f396SMaciej Żenczykowski u32 mark = IP4_REPLY_MARK(net, skb->mark); 10621b3c61dcSLorenzo Colitti 1063e2d118a1SLorenzo Colitti __build_flow_key(net, &fl4, NULL, iph, oif, 1064d888f396SMaciej Żenczykowski RT_TOS(iph->tos), protocol, mark, 0); 106536393395SDavid S. Miller rt = __ip_route_output_key(net, &fl4); 106636393395SDavid S. Miller if (!IS_ERR(rt)) { 10674895c771SDavid S. Miller __ip_rt_update_pmtu(rt, &fl4, mtu); 106836393395SDavid S. Miller ip_rt_put(rt); 106936393395SDavid S. Miller } 107036393395SDavid S. Miller } 107136393395SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_update_pmtu); 107236393395SDavid S. Miller 10739cb3a50cSSteffen Klassert static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) 107436393395SDavid S. Miller { 10754895c771SDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 10764895c771SDavid S. Miller struct flowi4 fl4; 10774895c771SDavid S. Miller struct rtable *rt; 107836393395SDavid S. Miller 1079e2d118a1SLorenzo Colitti __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); 10801b3c61dcSLorenzo Colitti 10811b3c61dcSLorenzo Colitti if (!fl4.flowi4_mark) 10821b3c61dcSLorenzo Colitti fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); 10831b3c61dcSLorenzo Colitti 10844895c771SDavid S. Miller rt = __ip_route_output_key(sock_net(sk), &fl4); 10854895c771SDavid S. Miller if (!IS_ERR(rt)) { 10864895c771SDavid S. Miller __ip_rt_update_pmtu(rt, &fl4, mtu); 10874895c771SDavid S. Miller ip_rt_put(rt); 10884895c771SDavid S. Miller } 108936393395SDavid S. Miller } 10909cb3a50cSSteffen Klassert 10919cb3a50cSSteffen Klassert void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) 10929cb3a50cSSteffen Klassert { 10939cb3a50cSSteffen Klassert const struct iphdr *iph = (const struct iphdr *) skb->data; 10949cb3a50cSSteffen Klassert struct flowi4 fl4; 10959cb3a50cSSteffen Klassert struct rtable *rt; 10967f502361SEric Dumazet struct dst_entry *odst = NULL; 1097b44108dbSSteffen Klassert bool new = false; 1098e2d118a1SLorenzo Colitti struct net *net = sock_net(sk); 10999cb3a50cSSteffen Klassert 11009cb3a50cSSteffen Klassert bh_lock_sock(sk); 1101482fc609SHannes Frederic Sowa 1102482fc609SHannes Frederic Sowa if (!ip_sk_accept_pmtu(sk)) 1103482fc609SHannes Frederic Sowa goto out; 1104482fc609SHannes Frederic Sowa 11057f502361SEric Dumazet odst = sk_dst_get(sk); 11069cb3a50cSSteffen Klassert 11077f502361SEric Dumazet if (sock_owned_by_user(sk) || !odst) { 11089cb3a50cSSteffen Klassert __ipv4_sk_update_pmtu(skb, sk, mtu); 11099cb3a50cSSteffen Klassert goto out; 11109cb3a50cSSteffen Klassert } 11119cb3a50cSSteffen Klassert 1112e2d118a1SLorenzo Colitti __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); 11139cb3a50cSSteffen Klassert 11147f502361SEric Dumazet rt = (struct rtable *)odst; 111551456b29SIan Morris if (odst->obsolete && !odst->ops->check(odst, 0)) { 11169cb3a50cSSteffen Klassert rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 11179cb3a50cSSteffen Klassert if (IS_ERR(rt)) 11189cb3a50cSSteffen Klassert goto out; 1119b44108dbSSteffen Klassert 1120b44108dbSSteffen Klassert new = true; 11219cb3a50cSSteffen Klassert } 11229cb3a50cSSteffen Klassert 11230f6c480fSDavid Miller __ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu); 11249cb3a50cSSteffen Klassert 11257f502361SEric Dumazet if (!dst_check(&rt->dst, 0)) { 1126b44108dbSSteffen Klassert if (new) 1127b44108dbSSteffen Klassert dst_release(&rt->dst); 1128b44108dbSSteffen Klassert 11299cb3a50cSSteffen Klassert rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 11309cb3a50cSSteffen Klassert if (IS_ERR(rt)) 11319cb3a50cSSteffen Klassert goto out; 11329cb3a50cSSteffen Klassert 1133b44108dbSSteffen Klassert new = true; 11349cb3a50cSSteffen Klassert } 11359cb3a50cSSteffen Klassert 1136b44108dbSSteffen Klassert if (new) 11377f502361SEric Dumazet sk_dst_set(sk, &rt->dst); 11389cb3a50cSSteffen Klassert 11399cb3a50cSSteffen Klassert out: 11409cb3a50cSSteffen Klassert bh_unlock_sock(sk); 11417f502361SEric Dumazet dst_release(odst); 11429cb3a50cSSteffen Klassert } 114336393395SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); 1144f39925dbSDavid S. Miller 1145b42597e2SDavid S. Miller void ipv4_redirect(struct sk_buff *skb, struct net *net, 11461042caa7SMaciej Żenczykowski int oif, u8 protocol) 1147b42597e2SDavid S. Miller { 1148b42597e2SDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 1149b42597e2SDavid S. Miller struct flowi4 fl4; 1150b42597e2SDavid S. Miller struct rtable *rt; 1151b42597e2SDavid S. Miller 1152e2d118a1SLorenzo Colitti __build_flow_key(net, &fl4, NULL, iph, oif, 11531042caa7SMaciej Żenczykowski RT_TOS(iph->tos), protocol, 0, 0); 1154b42597e2SDavid S. Miller rt = __ip_route_output_key(net, &fl4); 1155b42597e2SDavid S. Miller if (!IS_ERR(rt)) { 1156ceb33206SDavid S. Miller __ip_do_redirect(rt, skb, &fl4, false); 1157b42597e2SDavid S. Miller ip_rt_put(rt); 1158b42597e2SDavid S. Miller } 1159b42597e2SDavid S. Miller } 1160b42597e2SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_redirect); 1161b42597e2SDavid S. Miller 1162b42597e2SDavid S. Miller void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) 1163b42597e2SDavid S. Miller { 11644895c771SDavid S. Miller const struct iphdr *iph = (const struct iphdr *) skb->data; 11654895c771SDavid S. Miller struct flowi4 fl4; 11664895c771SDavid S. Miller struct rtable *rt; 1167e2d118a1SLorenzo Colitti struct net *net = sock_net(sk); 1168b42597e2SDavid S. Miller 1169e2d118a1SLorenzo Colitti __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); 1170e2d118a1SLorenzo Colitti rt = __ip_route_output_key(net, &fl4); 11714895c771SDavid S. Miller if (!IS_ERR(rt)) { 1172ceb33206SDavid S. Miller __ip_do_redirect(rt, skb, &fl4, false); 11734895c771SDavid S. Miller ip_rt_put(rt); 11744895c771SDavid S. Miller } 1175b42597e2SDavid S. Miller } 1176b42597e2SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_sk_redirect); 1177b42597e2SDavid S. Miller 1178efbc368dSDavid S. Miller static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1179efbc368dSDavid S. Miller { 1180efbc368dSDavid S. Miller struct rtable *rt = (struct rtable *) dst; 1181efbc368dSDavid S. Miller 1182ceb33206SDavid S. Miller /* All IPV4 dsts are created with ->obsolete set to the value 1183ceb33206SDavid S. Miller * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1184ceb33206SDavid S. Miller * into this function always. 1185ceb33206SDavid S. Miller * 1186387aa65aSTimo Teräs * When a PMTU/redirect information update invalidates a route, 1187387aa65aSTimo Teräs * this is indicated by setting obsolete to DST_OBSOLETE_KILL or 118802afc7adSJulian Wiedmann * DST_OBSOLETE_DEAD. 1189ceb33206SDavid S. Miller */ 1190387aa65aSTimo Teräs if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) 1191efbc368dSDavid S. Miller return NULL; 1192d11a4dc1STimo Teräs return dst; 11931da177e4SLinus Torvalds } 11941da177e4SLinus Torvalds 119520ff83f1SEric Dumazet static void ipv4_send_dest_unreach(struct sk_buff *skb) 11961da177e4SLinus Torvalds { 1197ed0de45aSStephen Suryaputra struct ip_options opt; 1198c543cb4aSEric Dumazet int res; 11991da177e4SLinus Torvalds 1200ed0de45aSStephen Suryaputra /* Recompile ip options since IPCB may not be valid anymore. 120120ff83f1SEric Dumazet * Also check we have a reasonable ipv4 header. 1202ed0de45aSStephen Suryaputra */ 120320ff83f1SEric Dumazet if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || 120420ff83f1SEric Dumazet ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) 120520ff83f1SEric Dumazet return; 120620ff83f1SEric Dumazet 1207ed0de45aSStephen Suryaputra memset(&opt, 0, sizeof(opt)); 120820ff83f1SEric Dumazet if (ip_hdr(skb)->ihl > 5) { 120920ff83f1SEric Dumazet if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) 121020ff83f1SEric Dumazet return; 1211ed0de45aSStephen Suryaputra opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); 1212c543cb4aSEric Dumazet 1213c543cb4aSEric Dumazet rcu_read_lock(); 1214c543cb4aSEric Dumazet res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); 1215c543cb4aSEric Dumazet rcu_read_unlock(); 1216c543cb4aSEric Dumazet 1217c543cb4aSEric Dumazet if (res) 1218ed0de45aSStephen Suryaputra return; 121920ff83f1SEric Dumazet } 1220ed0de45aSStephen Suryaputra __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); 122120ff83f1SEric Dumazet } 122220ff83f1SEric Dumazet 122320ff83f1SEric Dumazet static void ipv4_link_failure(struct sk_buff *skb) 122420ff83f1SEric Dumazet { 122520ff83f1SEric Dumazet struct rtable *rt; 122620ff83f1SEric Dumazet 122720ff83f1SEric Dumazet ipv4_send_dest_unreach(skb); 12281da177e4SLinus Torvalds 1229511c3f92SEric Dumazet rt = skb_rtable(skb); 12305943634fSDavid S. Miller if (rt) 12315943634fSDavid S. Miller dst_set_expires(&rt->dst, 0); 12322c8cec5cSDavid S. Miller } 12331da177e4SLinus Torvalds 1234ede2059dSEric W. Biederman static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb) 12351da177e4SLinus Torvalds { 123691df42beSJoe Perches pr_debug("%s: %pI4 -> %pI4, %s\n", 123791df42beSJoe Perches __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 12381da177e4SLinus Torvalds skb->dev ? skb->dev->name : "?"); 12391da177e4SLinus Torvalds kfree_skb(skb); 1240c378a9c0SDave Jones WARN_ON(1); 12411da177e4SLinus Torvalds return 0; 12421da177e4SLinus Torvalds } 12431da177e4SLinus Torvalds 12441da177e4SLinus Torvalds /* 12451da177e4SLinus Torvalds We do not cache source address of outgoing interface, 12461da177e4SLinus Torvalds because it is used only by IP RR, TS and SRR options, 12471da177e4SLinus Torvalds so that it out of fast path. 12481da177e4SLinus Torvalds 12491da177e4SLinus Torvalds BTW remember: "addr" is allowed to be not aligned 12501da177e4SLinus Torvalds in IP options! 12511da177e4SLinus Torvalds */ 12521da177e4SLinus Torvalds 12538e36360aSDavid S. Miller void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) 12541da177e4SLinus Torvalds { 1255a61ced5dSAl Viro __be32 src; 12561da177e4SLinus Torvalds 1257c7537967SDavid S. Miller if (rt_is_output_route(rt)) 1258c5be24ffSDavid S. Miller src = ip_hdr(skb)->saddr; 1259ebc0ffaeSEric Dumazet else { 12608e36360aSDavid S. Miller struct fib_result res; 1261e351bb62SMaciej Żenczykowski struct iphdr *iph = ip_hdr(skb); 1262e351bb62SMaciej Żenczykowski struct flowi4 fl4 = { 1263e351bb62SMaciej Żenczykowski .daddr = iph->daddr, 1264e351bb62SMaciej Żenczykowski .saddr = iph->saddr, 1265e351bb62SMaciej Żenczykowski .flowi4_tos = RT_TOS(iph->tos), 1266e351bb62SMaciej Żenczykowski .flowi4_oif = rt->dst.dev->ifindex, 1267e351bb62SMaciej Żenczykowski .flowi4_iif = skb->dev->ifindex, 1268e351bb62SMaciej Żenczykowski .flowi4_mark = skb->mark, 1269e351bb62SMaciej Żenczykowski }; 12705e2b61f7SDavid S. Miller 1271ebc0ffaeSEric Dumazet rcu_read_lock(); 12720eeb075fSAndy Gospodarek if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) 1273eba618abSDavid Ahern src = fib_result_prefsrc(dev_net(rt->dst.dev), &res); 1274ebc0ffaeSEric Dumazet else 1275f8126f1dSDavid S. Miller src = inet_select_addr(rt->dst.dev, 1276f8126f1dSDavid S. Miller rt_nexthop(rt, iph->daddr), 12771da177e4SLinus Torvalds RT_SCOPE_UNIVERSE); 1278ebc0ffaeSEric Dumazet rcu_read_unlock(); 1279ebc0ffaeSEric Dumazet } 12801da177e4SLinus Torvalds memcpy(addr, &src, 4); 12811da177e4SLinus Torvalds } 12821da177e4SLinus Torvalds 1283c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 12841da177e4SLinus Torvalds static void set_class_tag(struct rtable *rt, u32 tag) 12851da177e4SLinus Torvalds { 1286d8d1f30bSChangli Gao if (!(rt->dst.tclassid & 0xFFFF)) 1287d8d1f30bSChangli Gao rt->dst.tclassid |= tag & 0xFFFF; 1288d8d1f30bSChangli Gao if (!(rt->dst.tclassid & 0xFFFF0000)) 1289d8d1f30bSChangli Gao rt->dst.tclassid |= tag & 0xFFFF0000; 12901da177e4SLinus Torvalds } 12911da177e4SLinus Torvalds #endif 12921da177e4SLinus Torvalds 12930dbaee3bSDavid S. Miller static unsigned int ipv4_default_advmss(const struct dst_entry *dst) 12940dbaee3bSDavid S. Miller { 12957ed14d97SGao Feng unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); 1296164a5e7aSEric Dumazet unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, 12970dbaee3bSDavid S. Miller ip_rt_min_advmss); 12987ed14d97SGao Feng 12997ed14d97SGao Feng return min(advmss, IPV4_MAX_PMTU - header_size); 13000dbaee3bSDavid S. Miller } 13010dbaee3bSDavid S. Miller 1302ebb762f2SSteffen Klassert static unsigned int ipv4_mtu(const struct dst_entry *dst) 1303d33e4553SDavid S. Miller { 1304261663b0SSteffen Klassert const struct rtable *rt = (const struct rtable *) dst; 13055943634fSDavid S. Miller unsigned int mtu = rt->rt_pmtu; 13065943634fSDavid S. Miller 130798d75c37SAlexander Duyck if (!mtu || time_after_eq(jiffies, rt->dst.expires)) 13085943634fSDavid S. Miller mtu = dst_metric_raw(dst, RTAX_MTU); 1309618f9bc7SSteffen Klassert 131038d523e2SSteffen Klassert if (mtu) 1311618f9bc7SSteffen Klassert return mtu; 1312618f9bc7SSteffen Klassert 1313c780a049SEric Dumazet mtu = READ_ONCE(dst->dev->mtu); 1314d33e4553SDavid S. Miller 1315d52e5a7eSSabrina Dubroca if (unlikely(ip_mtu_locked(dst))) { 13161550c171SDavid Ahern if (rt->rt_gw_family && mtu > 576) 1317d33e4553SDavid S. Miller mtu = 576; 1318d33e4553SDavid S. Miller } 1319d33e4553SDavid S. Miller 132014972cbdSRoopa Prabhu mtu = min_t(unsigned int, mtu, IP_MAX_MTU); 132114972cbdSRoopa Prabhu 132214972cbdSRoopa Prabhu return mtu - lwtunnel_headroom(dst->lwtstate, mtu); 1323d33e4553SDavid S. Miller } 1324d33e4553SDavid S. Miller 1325a5995e71SDavid Ahern static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) 132694720e3aSJulian Anastasov { 132794720e3aSJulian Anastasov struct fnhe_hash_bucket *hash; 132894720e3aSJulian Anastasov struct fib_nh_exception *fnhe, __rcu **fnhe_p; 132994720e3aSJulian Anastasov u32 hval = fnhe_hashfun(daddr); 133094720e3aSJulian Anastasov 133194720e3aSJulian Anastasov spin_lock_bh(&fnhe_lock); 133294720e3aSJulian Anastasov 1333a5995e71SDavid Ahern hash = rcu_dereference_protected(nhc->nhc_exceptions, 133494720e3aSJulian Anastasov lockdep_is_held(&fnhe_lock)); 133594720e3aSJulian Anastasov hash += hval; 133694720e3aSJulian Anastasov 133794720e3aSJulian Anastasov fnhe_p = &hash->chain; 133894720e3aSJulian Anastasov fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); 133994720e3aSJulian Anastasov while (fnhe) { 134094720e3aSJulian Anastasov if (fnhe->fnhe_daddr == daddr) { 134194720e3aSJulian Anastasov rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( 134294720e3aSJulian Anastasov fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); 1343ee60ad21SXin Long /* set fnhe_daddr to 0 to ensure it won't bind with 1344ee60ad21SXin Long * new dsts in rt_bind_exception(). 1345ee60ad21SXin Long */ 1346ee60ad21SXin Long fnhe->fnhe_daddr = 0; 134794720e3aSJulian Anastasov fnhe_flush_routes(fnhe); 134894720e3aSJulian Anastasov kfree_rcu(fnhe, rcu); 134994720e3aSJulian Anastasov break; 135094720e3aSJulian Anastasov } 135194720e3aSJulian Anastasov fnhe_p = &fnhe->fnhe_next; 135294720e3aSJulian Anastasov fnhe = rcu_dereference_protected(fnhe->fnhe_next, 135394720e3aSJulian Anastasov lockdep_is_held(&fnhe_lock)); 135494720e3aSJulian Anastasov } 135594720e3aSJulian Anastasov 135694720e3aSJulian Anastasov spin_unlock_bh(&fnhe_lock); 135794720e3aSJulian Anastasov } 135894720e3aSJulian Anastasov 1359a5995e71SDavid Ahern static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, 1360a5995e71SDavid Ahern __be32 daddr) 13614895c771SDavid S. Miller { 1362a5995e71SDavid Ahern struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); 13634895c771SDavid S. Miller struct fib_nh_exception *fnhe; 13644895c771SDavid S. Miller u32 hval; 13654895c771SDavid S. Miller 1366f2bb4bedSDavid S. Miller if (!hash) 1367f2bb4bedSDavid S. Miller return NULL; 1368f2bb4bedSDavid S. Miller 1369d3a25c98SDavid S. Miller hval = fnhe_hashfun(daddr); 13704895c771SDavid S. Miller 13714895c771SDavid S. Miller for (fnhe = rcu_dereference(hash[hval].chain); fnhe; 13724895c771SDavid S. Miller fnhe = rcu_dereference(fnhe->fnhe_next)) { 137394720e3aSJulian Anastasov if (fnhe->fnhe_daddr == daddr) { 137494720e3aSJulian Anastasov if (fnhe->fnhe_expires && 137594720e3aSJulian Anastasov time_after(jiffies, fnhe->fnhe_expires)) { 1376a5995e71SDavid Ahern ip_del_fnhe(nhc, daddr); 137794720e3aSJulian Anastasov break; 137894720e3aSJulian Anastasov } 1379f2bb4bedSDavid S. Miller return fnhe; 1380f2bb4bedSDavid S. Miller } 138194720e3aSJulian Anastasov } 1382f2bb4bedSDavid S. Miller return NULL; 1383f2bb4bedSDavid S. Miller } 1384f2bb4bedSDavid S. Miller 138550d889b1SDavid Ahern /* MTU selection: 138650d889b1SDavid Ahern * 1. mtu on route is locked - use it 138750d889b1SDavid Ahern * 2. mtu from nexthop exception 138850d889b1SDavid Ahern * 3. mtu from egress device 138950d889b1SDavid Ahern */ 139050d889b1SDavid Ahern 139150d889b1SDavid Ahern u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) 139250d889b1SDavid Ahern { 1393eba618abSDavid Ahern struct fib_nh_common *nhc = res->nhc; 1394eba618abSDavid Ahern struct net_device *dev = nhc->nhc_dev; 139550d889b1SDavid Ahern struct fib_info *fi = res->fi; 139650d889b1SDavid Ahern u32 mtu = 0; 139750d889b1SDavid Ahern 139850d889b1SDavid Ahern if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || 139950d889b1SDavid Ahern fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) 140050d889b1SDavid Ahern mtu = fi->fib_mtu; 140150d889b1SDavid Ahern 140250d889b1SDavid Ahern if (likely(!mtu)) { 140350d889b1SDavid Ahern struct fib_nh_exception *fnhe; 140450d889b1SDavid Ahern 1405a5995e71SDavid Ahern fnhe = find_exception(nhc, daddr); 140650d889b1SDavid Ahern if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) 140750d889b1SDavid Ahern mtu = fnhe->fnhe_pmtu; 140850d889b1SDavid Ahern } 140950d889b1SDavid Ahern 141050d889b1SDavid Ahern if (likely(!mtu)) 141150d889b1SDavid Ahern mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); 141250d889b1SDavid Ahern 1413eba618abSDavid Ahern return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu); 141450d889b1SDavid Ahern } 141550d889b1SDavid Ahern 1416caacf05eSDavid S. Miller static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 1417a4c2fd7fSWei Wang __be32 daddr, const bool do_cache) 1418f2bb4bedSDavid S. Miller { 1419caacf05eSDavid S. Miller bool ret = false; 1420caacf05eSDavid S. Miller 1421c5038a83SDavid S. Miller spin_lock_bh(&fnhe_lock); 1422aee06da6SJulian Anastasov 1423c5038a83SDavid S. Miller if (daddr == fnhe->fnhe_daddr) { 14242ffae99dSTimo Teräs struct rtable __rcu **porig; 14252ffae99dSTimo Teräs struct rtable *orig; 14265aad1de5STimo Teräs int genid = fnhe_genid(dev_net(rt->dst.dev)); 14272ffae99dSTimo Teräs 14282ffae99dSTimo Teräs if (rt_is_input_route(rt)) 14292ffae99dSTimo Teräs porig = &fnhe->fnhe_rth_input; 14302ffae99dSTimo Teräs else 14312ffae99dSTimo Teräs porig = &fnhe->fnhe_rth_output; 14322ffae99dSTimo Teräs orig = rcu_dereference(*porig); 14335aad1de5STimo Teräs 14345aad1de5STimo Teräs if (fnhe->fnhe_genid != genid) { 14355aad1de5STimo Teräs fnhe->fnhe_genid = genid; 143613d82bf5SSteffen Klassert fnhe->fnhe_gw = 0; 143713d82bf5SSteffen Klassert fnhe->fnhe_pmtu = 0; 143813d82bf5SSteffen Klassert fnhe->fnhe_expires = 0; 14390e8411e4SHangbin Liu fnhe->fnhe_mtu_locked = false; 14402ffae99dSTimo Teräs fnhe_flush_routes(fnhe); 14412ffae99dSTimo Teräs orig = NULL; 144213d82bf5SSteffen Klassert } 1443387aa65aSTimo Teräs fill_route_from_fnhe(rt, fnhe); 14441550c171SDavid Ahern if (!rt->rt_gw4) { 14451550c171SDavid Ahern rt->rt_gw4 = daddr; 14461550c171SDavid Ahern rt->rt_gw_family = AF_INET; 14471550c171SDavid Ahern } 1448f2bb4bedSDavid S. Miller 1449a4c2fd7fSWei Wang if (do_cache) { 14500830106cSWei Wang dst_hold(&rt->dst); 14512ffae99dSTimo Teräs rcu_assign_pointer(*porig, rt); 14520830106cSWei Wang if (orig) { 145395c47f9cSWei Wang dst_dev_put(&orig->dst); 14540830106cSWei Wang dst_release(&orig->dst); 14550830106cSWei Wang } 14562ffae99dSTimo Teräs ret = true; 14572ffae99dSTimo Teräs } 1458c5038a83SDavid S. Miller 1459c5038a83SDavid S. Miller fnhe->fnhe_stamp = jiffies; 1460c5038a83SDavid S. Miller } 1461c5038a83SDavid S. Miller spin_unlock_bh(&fnhe_lock); 1462caacf05eSDavid S. Miller 1463caacf05eSDavid S. Miller return ret; 146454764bb6SEric Dumazet } 146554764bb6SEric Dumazet 146687063a1fSDavid Ahern static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) 1467f2bb4bedSDavid S. Miller { 1468d26b3a7cSEric Dumazet struct rtable *orig, *prev, **p; 1469caacf05eSDavid S. Miller bool ret = true; 1470f2bb4bedSDavid S. Miller 1471d26b3a7cSEric Dumazet if (rt_is_input_route(rt)) { 14720f457a36SDavid Ahern p = (struct rtable **)&nhc->nhc_rth_input; 1473d26b3a7cSEric Dumazet } else { 14740f457a36SDavid Ahern p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); 1475d26b3a7cSEric Dumazet } 1476f2bb4bedSDavid S. Miller orig = *p; 1477f2bb4bedSDavid S. Miller 14780830106cSWei Wang /* hold dst before doing cmpxchg() to avoid race condition 14790830106cSWei Wang * on this dst 14800830106cSWei Wang */ 14810830106cSWei Wang dst_hold(&rt->dst); 1482f2bb4bedSDavid S. Miller prev = cmpxchg(p, orig, rt); 1483f2bb4bedSDavid S. Miller if (prev == orig) { 14840830106cSWei Wang if (orig) { 148595c47f9cSWei Wang dst_dev_put(&orig->dst); 14860830106cSWei Wang dst_release(&orig->dst); 14870830106cSWei Wang } 14880830106cSWei Wang } else { 14890830106cSWei Wang dst_release(&rt->dst); 1490caacf05eSDavid S. Miller ret = false; 14910830106cSWei Wang } 1492caacf05eSDavid S. Miller 1493caacf05eSDavid S. Miller return ret; 1494caacf05eSDavid S. Miller } 1495caacf05eSDavid S. Miller 14965055c371SEric Dumazet struct uncached_list { 14975055c371SEric Dumazet spinlock_t lock; 14985055c371SEric Dumazet struct list_head head; 14995055c371SEric Dumazet }; 15005055c371SEric Dumazet 15015055c371SEric Dumazet static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); 1502caacf05eSDavid S. Miller 1503510c321bSXin Long void rt_add_uncached_list(struct rtable *rt) 1504caacf05eSDavid S. Miller { 15055055c371SEric Dumazet struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); 15065055c371SEric Dumazet 15075055c371SEric Dumazet rt->rt_uncached_list = ul; 15085055c371SEric Dumazet 15095055c371SEric Dumazet spin_lock_bh(&ul->lock); 15105055c371SEric Dumazet list_add_tail(&rt->rt_uncached, &ul->head); 15115055c371SEric Dumazet spin_unlock_bh(&ul->lock); 1512caacf05eSDavid S. Miller } 1513caacf05eSDavid S. Miller 1514510c321bSXin Long void rt_del_uncached_list(struct rtable *rt) 1515510c321bSXin Long { 1516510c321bSXin Long if (!list_empty(&rt->rt_uncached)) { 1517510c321bSXin Long struct uncached_list *ul = rt->rt_uncached_list; 1518510c321bSXin Long 1519510c321bSXin Long spin_lock_bh(&ul->lock); 1520510c321bSXin Long list_del(&rt->rt_uncached); 1521510c321bSXin Long spin_unlock_bh(&ul->lock); 1522510c321bSXin Long } 1523510c321bSXin Long } 1524510c321bSXin Long 1525caacf05eSDavid S. Miller static void ipv4_dst_destroy(struct dst_entry *dst) 1526caacf05eSDavid S. Miller { 1527caacf05eSDavid S. Miller struct rtable *rt = (struct rtable *)dst; 1528caacf05eSDavid S. Miller 15291620a336SDavid Ahern ip_dst_metrics_put(dst); 1530510c321bSXin Long rt_del_uncached_list(rt); 1531caacf05eSDavid S. Miller } 1532caacf05eSDavid S. Miller 1533caacf05eSDavid S. Miller void rt_flush_dev(struct net_device *dev) 1534caacf05eSDavid S. Miller { 1535caacf05eSDavid S. Miller struct net *net = dev_net(dev); 1536caacf05eSDavid S. Miller struct rtable *rt; 15375055c371SEric Dumazet int cpu; 1538caacf05eSDavid S. Miller 15395055c371SEric Dumazet for_each_possible_cpu(cpu) { 15405055c371SEric Dumazet struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); 15415055c371SEric Dumazet 15425055c371SEric Dumazet spin_lock_bh(&ul->lock); 15435055c371SEric Dumazet list_for_each_entry(rt, &ul->head, rt_uncached) { 1544caacf05eSDavid S. Miller if (rt->dst.dev != dev) 1545caacf05eSDavid S. Miller continue; 1546caacf05eSDavid S. Miller rt->dst.dev = net->loopback_dev; 1547caacf05eSDavid S. Miller dev_hold(rt->dst.dev); 1548caacf05eSDavid S. Miller dev_put(dev); 1549caacf05eSDavid S. Miller } 15505055c371SEric Dumazet spin_unlock_bh(&ul->lock); 15514895c771SDavid S. Miller } 15524895c771SDavid S. Miller } 15534895c771SDavid S. Miller 15544331debcSEric Dumazet static bool rt_cache_valid(const struct rtable *rt) 1555d2d68ba9SDavid S. Miller { 15564331debcSEric Dumazet return rt && 15574331debcSEric Dumazet rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 15584331debcSEric Dumazet !rt_is_expired(rt); 1559d2d68ba9SDavid S. Miller } 1560d2d68ba9SDavid S. Miller 1561f2bb4bedSDavid S. Miller static void rt_set_nexthop(struct rtable *rt, __be32 daddr, 15625e2b61f7SDavid S. Miller const struct fib_result *res, 1563f2bb4bedSDavid S. Miller struct fib_nh_exception *fnhe, 1564a4c2fd7fSWei Wang struct fib_info *fi, u16 type, u32 itag, 1565a4c2fd7fSWei Wang const bool do_cache) 15661da177e4SLinus Torvalds { 1567caacf05eSDavid S. Miller bool cached = false; 1568caacf05eSDavid S. Miller 15691da177e4SLinus Torvalds if (fi) { 1570eba618abSDavid Ahern struct fib_nh_common *nhc = FIB_RES_NHC(*res); 15714895c771SDavid S. Miller 15720f5f7d7bSDavid Ahern if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { 15730f5f7d7bSDavid Ahern rt->rt_gw_family = nhc->nhc_gw_family; 15740f5f7d7bSDavid Ahern /* only INET and INET6 are supported */ 15750f5f7d7bSDavid Ahern if (likely(nhc->nhc_gw_family == AF_INET)) 15760f5f7d7bSDavid Ahern rt->rt_gw4 = nhc->nhc_gw.ipv4; 15770f5f7d7bSDavid Ahern else 15780f5f7d7bSDavid Ahern rt->rt_gw6 = nhc->nhc_gw.ipv6; 1579155e8336SJulian Anastasov } 15800f5f7d7bSDavid Ahern 1581e1255ed4SDavid Ahern ip_dst_init_metrics(&rt->dst, fi->fib_metrics); 1582e1255ed4SDavid Ahern 1583c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1584dcb1ecb5SDavid Ahern if (nhc->nhc_family == AF_INET) { 158587063a1fSDavid Ahern struct fib_nh *nh; 158687063a1fSDavid Ahern 158787063a1fSDavid Ahern nh = container_of(nhc, struct fib_nh, nh_common); 1588f2bb4bedSDavid S. Miller rt->dst.tclassid = nh->nh_tclassid; 158987063a1fSDavid Ahern } 15901da177e4SLinus Torvalds #endif 159187063a1fSDavid Ahern rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); 1592c5038a83SDavid S. Miller if (unlikely(fnhe)) 1593a4c2fd7fSWei Wang cached = rt_bind_exception(rt, fnhe, daddr, do_cache); 1594a4c2fd7fSWei Wang else if (do_cache) 159587063a1fSDavid Ahern cached = rt_cache_route(nhc, rt); 1596155e8336SJulian Anastasov if (unlikely(!cached)) { 1597155e8336SJulian Anastasov /* Routes we intend to cache in nexthop exception or 1598155e8336SJulian Anastasov * FIB nexthop have the DST_NOCACHE bit clear. 1599155e8336SJulian Anastasov * However, if we are unsuccessful at storing this 1600155e8336SJulian Anastasov * route into the cache we really need to set it. 1601155e8336SJulian Anastasov */ 16021550c171SDavid Ahern if (!rt->rt_gw4) { 16031550c171SDavid Ahern rt->rt_gw_family = AF_INET; 16041550c171SDavid Ahern rt->rt_gw4 = daddr; 16051550c171SDavid Ahern } 1606155e8336SJulian Anastasov rt_add_uncached_list(rt); 1607d33e4553SDavid S. Miller } 1608155e8336SJulian Anastasov } else 1609caacf05eSDavid S. Miller rt_add_uncached_list(rt); 16101da177e4SLinus Torvalds 1611c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 16121da177e4SLinus Torvalds #ifdef CONFIG_IP_MULTIPLE_TABLES 161385b91b03SDavid S. Miller set_class_tag(rt, res->tclassid); 16141da177e4SLinus Torvalds #endif 16151da177e4SLinus Torvalds set_class_tag(rt, itag); 16161da177e4SLinus Torvalds #endif 16171da177e4SLinus Torvalds } 16181da177e4SLinus Torvalds 16199ab179d8SDavid Ahern struct rtable *rt_dst_alloc(struct net_device *dev, 1620d08c4f35SDavid Ahern unsigned int flags, u16 type, 1621f2bb4bedSDavid S. Miller bool nopolicy, bool noxfrm, bool will_cache) 16220c4dcd58SDavid S. Miller { 1623d08c4f35SDavid Ahern struct rtable *rt; 1624d08c4f35SDavid Ahern 1625d08c4f35SDavid Ahern rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, 1626a4c2fd7fSWei Wang (will_cache ? 0 : DST_HOST) | 16270c4dcd58SDavid S. Miller (nopolicy ? DST_NOPOLICY : 0) | 1628b2a9c0edSWei Wang (noxfrm ? DST_NOXFRM : 0)); 1629d08c4f35SDavid Ahern 1630d08c4f35SDavid Ahern if (rt) { 1631d08c4f35SDavid Ahern rt->rt_genid = rt_genid_ipv4(dev_net(dev)); 1632d08c4f35SDavid Ahern rt->rt_flags = flags; 1633d08c4f35SDavid Ahern rt->rt_type = type; 1634d08c4f35SDavid Ahern rt->rt_is_input = 0; 1635d08c4f35SDavid Ahern rt->rt_iif = 0; 1636d08c4f35SDavid Ahern rt->rt_pmtu = 0; 1637d52e5a7eSSabrina Dubroca rt->rt_mtu_locked = 0; 16381550c171SDavid Ahern rt->rt_gw_family = 0; 16391550c171SDavid Ahern rt->rt_gw4 = 0; 1640d08c4f35SDavid Ahern INIT_LIST_HEAD(&rt->rt_uncached); 1641d08c4f35SDavid Ahern 1642d08c4f35SDavid Ahern rt->dst.output = ip_output; 1643d08c4f35SDavid Ahern if (flags & RTCF_LOCAL) 1644d08c4f35SDavid Ahern rt->dst.input = ip_local_deliver; 1645d08c4f35SDavid Ahern } 1646d08c4f35SDavid Ahern 1647d08c4f35SDavid Ahern return rt; 16480c4dcd58SDavid S. Miller } 16499ab179d8SDavid Ahern EXPORT_SYMBOL(rt_dst_alloc); 16500c4dcd58SDavid S. Miller 165196d36220SEric Dumazet /* called in rcu_read_lock() section */ 1652bc044e8dSPaolo Abeni int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1653bc044e8dSPaolo Abeni u8 tos, struct net_device *dev, 1654bc044e8dSPaolo Abeni struct in_device *in_dev, u32 *itag) 16551da177e4SLinus Torvalds { 1656b5f7e755SEric Dumazet int err; 16571da177e4SLinus Torvalds 16581da177e4SLinus Torvalds /* Primary sanity checks. */ 165951456b29SIan Morris if (!in_dev) 16601da177e4SLinus Torvalds return -EINVAL; 16611da177e4SLinus Torvalds 16621e637c74SJan Engelhardt if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1663d0daebc3SThomas Graf skb->protocol != htons(ETH_P_IP)) 1664bc044e8dSPaolo Abeni return -EINVAL; 1665d0daebc3SThomas Graf 166675fea73dSAlexander Duyck if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) 1667bc044e8dSPaolo Abeni return -EINVAL; 16681da177e4SLinus Torvalds 1669f97c1e0cSJoe Perches if (ipv4_is_zeronet(saddr)) { 16701d2f4ebbSEdward Chron if (!ipv4_is_local_multicast(daddr) && 16711d2f4ebbSEdward Chron ip_hdr(skb)->protocol != IPPROTO_IGMP) 1672bc044e8dSPaolo Abeni return -EINVAL; 1673b5f7e755SEric Dumazet } else { 16749e56e380SDavid S. Miller err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 1675bc044e8dSPaolo Abeni in_dev, itag); 1676b5f7e755SEric Dumazet if (err < 0) 1677bc044e8dSPaolo Abeni return err; 1678b5f7e755SEric Dumazet } 1679bc044e8dSPaolo Abeni return 0; 1680bc044e8dSPaolo Abeni } 1681bc044e8dSPaolo Abeni 1682bc044e8dSPaolo Abeni /* called in rcu_read_lock() section */ 1683bc044e8dSPaolo Abeni static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1684bc044e8dSPaolo Abeni u8 tos, struct net_device *dev, int our) 1685bc044e8dSPaolo Abeni { 1686bc044e8dSPaolo Abeni struct in_device *in_dev = __in_dev_get_rcu(dev); 1687bc044e8dSPaolo Abeni unsigned int flags = RTCF_MULTICAST; 1688bc044e8dSPaolo Abeni struct rtable *rth; 1689bc044e8dSPaolo Abeni u32 itag = 0; 1690bc044e8dSPaolo Abeni int err; 1691bc044e8dSPaolo Abeni 1692bc044e8dSPaolo Abeni err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); 1693bc044e8dSPaolo Abeni if (err) 1694bc044e8dSPaolo Abeni return err; 1695bc044e8dSPaolo Abeni 1696d08c4f35SDavid Ahern if (our) 1697d08c4f35SDavid Ahern flags |= RTCF_LOCAL; 1698d08c4f35SDavid Ahern 1699d08c4f35SDavid Ahern rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, 1700f2bb4bedSDavid S. Miller IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); 17011da177e4SLinus Torvalds if (!rth) 1702bc044e8dSPaolo Abeni return -ENOBUFS; 17031da177e4SLinus Torvalds 1704c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 1705d8d1f30bSChangli Gao rth->dst.tclassid = itag; 17061da177e4SLinus Torvalds #endif 1707cf911662SDavid S. Miller rth->dst.output = ip_rt_bug; 17089917e1e8SDavid S. Miller rth->rt_is_input= 1; 17091da177e4SLinus Torvalds 17101da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE 1711f97c1e0cSJoe Perches if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 1712d8d1f30bSChangli Gao rth->dst.input = ip_mr_input; 17131da177e4SLinus Torvalds #endif 17141da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_slow_mc); 17151da177e4SLinus Torvalds 171689aef892SDavid S. Miller skb_dst_set(skb, &rth->dst); 171789aef892SDavid S. Miller return 0; 17181da177e4SLinus Torvalds } 17191da177e4SLinus Torvalds 17201da177e4SLinus Torvalds 17211da177e4SLinus Torvalds static void ip_handle_martian_source(struct net_device *dev, 17221da177e4SLinus Torvalds struct in_device *in_dev, 17231da177e4SLinus Torvalds struct sk_buff *skb, 17249e12bb22SAl Viro __be32 daddr, 17259e12bb22SAl Viro __be32 saddr) 17261da177e4SLinus Torvalds { 17271da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_martian_src); 17281da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE 17291da177e4SLinus Torvalds if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { 17301da177e4SLinus Torvalds /* 17311da177e4SLinus Torvalds * RFC1812 recommendation, if source is martian, 17321da177e4SLinus Torvalds * the only hint is MAC header. 17331da177e4SLinus Torvalds */ 1734058bd4d2SJoe Perches pr_warn("martian source %pI4 from %pI4, on dev %s\n", 1735673d57e7SHarvey Harrison &daddr, &saddr, dev->name); 173698e399f8SArnaldo Carvalho de Melo if (dev->hard_header_len && skb_mac_header_was_set(skb)) { 1737058bd4d2SJoe Perches print_hex_dump(KERN_WARNING, "ll header: ", 1738058bd4d2SJoe Perches DUMP_PREFIX_OFFSET, 16, 1, 1739058bd4d2SJoe Perches skb_mac_header(skb), 1740b2c85100SDavid S. Miller dev->hard_header_len, false); 17411da177e4SLinus Torvalds } 17421da177e4SLinus Torvalds } 17431da177e4SLinus Torvalds #endif 17441da177e4SLinus Torvalds } 17451da177e4SLinus Torvalds 174647360228SEric Dumazet /* called in rcu_read_lock() section */ 17475969f71dSStephen Hemminger static int __mkroute_input(struct sk_buff *skb, 1748982721f3SDavid S. Miller const struct fib_result *res, 17491da177e4SLinus Torvalds struct in_device *in_dev, 1750c6cffba4SDavid S. Miller __be32 daddr, __be32 saddr, u32 tos) 17511da177e4SLinus Torvalds { 1752eba618abSDavid Ahern struct fib_nh_common *nhc = FIB_RES_NHC(*res); 1753eba618abSDavid Ahern struct net_device *dev = nhc->nhc_dev; 17542ffae99dSTimo Teräs struct fib_nh_exception *fnhe; 17551da177e4SLinus Torvalds struct rtable *rth; 17561da177e4SLinus Torvalds int err; 17571da177e4SLinus Torvalds struct in_device *out_dev; 1758d2d68ba9SDavid S. Miller bool do_cache; 1759fbdc0ad0SLi RongQing u32 itag = 0; 17601da177e4SLinus Torvalds 17611da177e4SLinus Torvalds /* get a working reference to the output device */ 1762eba618abSDavid Ahern out_dev = __in_dev_get_rcu(dev); 176351456b29SIan Morris if (!out_dev) { 1764e87cc472SJoe Perches net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); 17651da177e4SLinus Torvalds return -EINVAL; 17661da177e4SLinus Torvalds } 17671da177e4SLinus Torvalds 17685c04c819SMichael Smith err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), 17699e56e380SDavid S. Miller in_dev->dev, in_dev, &itag); 17701da177e4SLinus Torvalds if (err < 0) { 17711da177e4SLinus Torvalds ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 17721da177e4SLinus Torvalds saddr); 17731da177e4SLinus Torvalds 17741da177e4SLinus Torvalds goto cleanup; 17751da177e4SLinus Torvalds } 17761da177e4SLinus Torvalds 1777e81da0e1SJulian Anastasov do_cache = res->fi && !itag; 1778e81da0e1SJulian Anastasov if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && 1779eba618abSDavid Ahern skb->protocol == htons(ETH_P_IP)) { 1780bdf00467SDavid Ahern __be32 gw; 1781eba618abSDavid Ahern 1782bdf00467SDavid Ahern gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; 1783eba618abSDavid Ahern if (IN_DEV_SHARED_MEDIA(out_dev) || 1784eba618abSDavid Ahern inet_addr_onlink(out_dev, saddr, gw)) 1785df4d9254SHannes Frederic Sowa IPCB(skb)->flags |= IPSKB_DOREDIRECT; 1786eba618abSDavid Ahern } 17871da177e4SLinus Torvalds 17881da177e4SLinus Torvalds if (skb->protocol != htons(ETH_P_IP)) { 17891da177e4SLinus Torvalds /* Not IP (i.e. ARP). Do not create route, if it is 17901da177e4SLinus Torvalds * invalid for proxy arp. DNAT routes are always valid. 179165324144SJesper Dangaard Brouer * 179265324144SJesper Dangaard Brouer * Proxy arp feature have been extended to allow, ARP 179365324144SJesper Dangaard Brouer * replies back to the same interface, to support 179465324144SJesper Dangaard Brouer * Private VLAN switch technologies. See arp.c. 17951da177e4SLinus Torvalds */ 179665324144SJesper Dangaard Brouer if (out_dev == in_dev && 179765324144SJesper Dangaard Brouer IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { 17981da177e4SLinus Torvalds err = -EINVAL; 17991da177e4SLinus Torvalds goto cleanup; 18001da177e4SLinus Torvalds } 18011da177e4SLinus Torvalds } 18021da177e4SLinus Torvalds 1803a5995e71SDavid Ahern fnhe = find_exception(nhc, daddr); 1804e81da0e1SJulian Anastasov if (do_cache) { 180594720e3aSJulian Anastasov if (fnhe) 18062ffae99dSTimo Teräs rth = rcu_dereference(fnhe->fnhe_rth_input); 180794720e3aSJulian Anastasov else 18080f457a36SDavid Ahern rth = rcu_dereference(nhc->nhc_rth_input); 1809d2d68ba9SDavid S. Miller if (rt_cache_valid(rth)) { 1810c6cffba4SDavid S. Miller skb_dst_set_noref(skb, &rth->dst); 1811d2d68ba9SDavid S. Miller goto out; 1812d2d68ba9SDavid S. Miller } 1813d2d68ba9SDavid S. Miller } 1814f2bb4bedSDavid S. Miller 1815d08c4f35SDavid Ahern rth = rt_dst_alloc(out_dev->dev, 0, res->type, 18165c1e6aa3SDavid S. Miller IN_DEV_CONF_GET(in_dev, NOPOLICY), 1817d2d68ba9SDavid S. Miller IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); 18181da177e4SLinus Torvalds if (!rth) { 18191da177e4SLinus Torvalds err = -ENOBUFS; 18201da177e4SLinus Torvalds goto cleanup; 18211da177e4SLinus Torvalds } 18221da177e4SLinus Torvalds 18239917e1e8SDavid S. Miller rth->rt_is_input = 1; 1824a6254864SDuan Jiong RT_CACHE_STAT_INC(in_slow_tot); 18251da177e4SLinus Torvalds 1826d8d1f30bSChangli Gao rth->dst.input = ip_forward; 18271da177e4SLinus Torvalds 1828a4c2fd7fSWei Wang rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, 1829a4c2fd7fSWei Wang do_cache); 18309942895bSDavid Ahern lwtunnel_set_redirect(&rth->dst); 1831c6cffba4SDavid S. Miller skb_dst_set(skb, &rth->dst); 1832d2d68ba9SDavid S. Miller out: 18331da177e4SLinus Torvalds err = 0; 18341da177e4SLinus Torvalds cleanup: 18351da177e4SLinus Torvalds return err; 18361da177e4SLinus Torvalds } 18371da177e4SLinus Torvalds 183879a13159SPeter Nørlund #ifdef CONFIG_IP_ROUTE_MULTIPATH 183979a13159SPeter Nørlund /* To make ICMP packets follow the right flow, the multipath hash is 1840bf4e0a3dSNikolay Aleksandrov * calculated from the inner IP addresses. 184179a13159SPeter Nørlund */ 1842bf4e0a3dSNikolay Aleksandrov static void ip_multipath_l3_keys(const struct sk_buff *skb, 1843bf4e0a3dSNikolay Aleksandrov struct flow_keys *hash_keys) 184479a13159SPeter Nørlund { 184579a13159SPeter Nørlund const struct iphdr *outer_iph = ip_hdr(skb); 18466f74b6c2SDavid Ahern const struct iphdr *key_iph = outer_iph; 1847bf4e0a3dSNikolay Aleksandrov const struct iphdr *inner_iph; 184879a13159SPeter Nørlund const struct icmphdr *icmph; 184979a13159SPeter Nørlund struct iphdr _inner_iph; 1850bf4e0a3dSNikolay Aleksandrov struct icmphdr _icmph; 1851bf4e0a3dSNikolay Aleksandrov 1852bf4e0a3dSNikolay Aleksandrov if (likely(outer_iph->protocol != IPPROTO_ICMP)) 18536f74b6c2SDavid Ahern goto out; 185479a13159SPeter Nørlund 185579a13159SPeter Nørlund if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) 18566f74b6c2SDavid Ahern goto out; 185779a13159SPeter Nørlund 185879a13159SPeter Nørlund icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), 185979a13159SPeter Nørlund &_icmph); 186079a13159SPeter Nørlund if (!icmph) 18616f74b6c2SDavid Ahern goto out; 186279a13159SPeter Nørlund 186379a13159SPeter Nørlund if (icmph->type != ICMP_DEST_UNREACH && 186479a13159SPeter Nørlund icmph->type != ICMP_REDIRECT && 186579a13159SPeter Nørlund icmph->type != ICMP_TIME_EXCEEDED && 1866bf4e0a3dSNikolay Aleksandrov icmph->type != ICMP_PARAMETERPROB) 18676f74b6c2SDavid Ahern goto out; 186879a13159SPeter Nørlund 186979a13159SPeter Nørlund inner_iph = skb_header_pointer(skb, 187079a13159SPeter Nørlund outer_iph->ihl * 4 + sizeof(_icmph), 187179a13159SPeter Nørlund sizeof(_inner_iph), &_inner_iph); 187279a13159SPeter Nørlund if (!inner_iph) 18736f74b6c2SDavid Ahern goto out; 18746f74b6c2SDavid Ahern 18756f74b6c2SDavid Ahern key_iph = inner_iph; 18766f74b6c2SDavid Ahern out: 18776f74b6c2SDavid Ahern hash_keys->addrs.v4addrs.src = key_iph->saddr; 18786f74b6c2SDavid Ahern hash_keys->addrs.v4addrs.dst = key_iph->daddr; 187979a13159SPeter Nørlund } 188079a13159SPeter Nørlund 1881bf4e0a3dSNikolay Aleksandrov /* if skb is set it will be used and fl4 can be NULL */ 18827efc0b6bSDavid Ahern int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, 1883e37b1e97SRoopa Prabhu const struct sk_buff *skb, struct flow_keys *flkeys) 1884bf4e0a3dSNikolay Aleksandrov { 18852a8e4997SIdo Schimmel u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; 1886bf4e0a3dSNikolay Aleksandrov struct flow_keys hash_keys; 1887bf4e0a3dSNikolay Aleksandrov u32 mhash; 1888bf4e0a3dSNikolay Aleksandrov 1889bf4e0a3dSNikolay Aleksandrov switch (net->ipv4.sysctl_fib_multipath_hash_policy) { 1890bf4e0a3dSNikolay Aleksandrov case 0: 1891bf4e0a3dSNikolay Aleksandrov memset(&hash_keys, 0, sizeof(hash_keys)); 1892bf4e0a3dSNikolay Aleksandrov hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1893bf4e0a3dSNikolay Aleksandrov if (skb) { 1894bf4e0a3dSNikolay Aleksandrov ip_multipath_l3_keys(skb, &hash_keys); 1895bf4e0a3dSNikolay Aleksandrov } else { 1896bf4e0a3dSNikolay Aleksandrov hash_keys.addrs.v4addrs.src = fl4->saddr; 1897bf4e0a3dSNikolay Aleksandrov hash_keys.addrs.v4addrs.dst = fl4->daddr; 1898bf4e0a3dSNikolay Aleksandrov } 1899bf4e0a3dSNikolay Aleksandrov break; 1900bf4e0a3dSNikolay Aleksandrov case 1: 1901bf4e0a3dSNikolay Aleksandrov /* skb is currently provided only when forwarding */ 1902bf4e0a3dSNikolay Aleksandrov if (skb) { 1903bf4e0a3dSNikolay Aleksandrov unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; 1904bf4e0a3dSNikolay Aleksandrov struct flow_keys keys; 1905bf4e0a3dSNikolay Aleksandrov 1906bf4e0a3dSNikolay Aleksandrov /* short-circuit if we already have L4 hash present */ 1907bf4e0a3dSNikolay Aleksandrov if (skb->l4_hash) 1908bf4e0a3dSNikolay Aleksandrov return skb_get_hash_raw(skb) >> 1; 1909ec7127a5SDavid Ahern 1910bf4e0a3dSNikolay Aleksandrov memset(&hash_keys, 0, sizeof(hash_keys)); 19111fe4b118SDavid Ahern 1912ec7127a5SDavid Ahern if (!flkeys) { 1913ec7127a5SDavid Ahern skb_flow_dissect_flow_keys(skb, &keys, flag); 1914ec7127a5SDavid Ahern flkeys = &keys; 1915ec7127a5SDavid Ahern } 1916ec7127a5SDavid Ahern 1917e37b1e97SRoopa Prabhu hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1918e37b1e97SRoopa Prabhu hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; 1919e37b1e97SRoopa Prabhu hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; 1920e37b1e97SRoopa Prabhu hash_keys.ports.src = flkeys->ports.src; 1921e37b1e97SRoopa Prabhu hash_keys.ports.dst = flkeys->ports.dst; 1922e37b1e97SRoopa Prabhu hash_keys.basic.ip_proto = flkeys->basic.ip_proto; 1923e37b1e97SRoopa Prabhu } else { 1924bf4e0a3dSNikolay Aleksandrov memset(&hash_keys, 0, sizeof(hash_keys)); 1925bf4e0a3dSNikolay Aleksandrov hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1926bf4e0a3dSNikolay Aleksandrov hash_keys.addrs.v4addrs.src = fl4->saddr; 1927bf4e0a3dSNikolay Aleksandrov hash_keys.addrs.v4addrs.dst = fl4->daddr; 1928bf4e0a3dSNikolay Aleksandrov hash_keys.ports.src = fl4->fl4_sport; 1929bf4e0a3dSNikolay Aleksandrov hash_keys.ports.dst = fl4->fl4_dport; 1930bf4e0a3dSNikolay Aleksandrov hash_keys.basic.ip_proto = fl4->flowi4_proto; 1931bf4e0a3dSNikolay Aleksandrov } 1932bf4e0a3dSNikolay Aleksandrov break; 1933363887a2SStephen Suryaputra case 2: 1934363887a2SStephen Suryaputra memset(&hash_keys, 0, sizeof(hash_keys)); 1935363887a2SStephen Suryaputra hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; 1936363887a2SStephen Suryaputra /* skb is currently provided only when forwarding */ 1937363887a2SStephen Suryaputra if (skb) { 1938363887a2SStephen Suryaputra struct flow_keys keys; 1939363887a2SStephen Suryaputra 1940363887a2SStephen Suryaputra skb_flow_dissect_flow_keys(skb, &keys, 0); 1941363887a2SStephen Suryaputra 1942363887a2SStephen Suryaputra hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; 1943363887a2SStephen Suryaputra hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; 1944363887a2SStephen Suryaputra } else { 1945363887a2SStephen Suryaputra /* Same as case 0 */ 1946363887a2SStephen Suryaputra hash_keys.addrs.v4addrs.src = fl4->saddr; 1947363887a2SStephen Suryaputra hash_keys.addrs.v4addrs.dst = fl4->daddr; 1948363887a2SStephen Suryaputra } 1949363887a2SStephen Suryaputra break; 1950bf4e0a3dSNikolay Aleksandrov } 1951bf4e0a3dSNikolay Aleksandrov mhash = flow_hash_from_keys(&hash_keys); 1952bf4e0a3dSNikolay Aleksandrov 195324ba1440Swenxu if (multipath_hash) 195424ba1440Swenxu mhash = jhash_2words(mhash, multipath_hash, 0); 195524ba1440Swenxu 1956bf4e0a3dSNikolay Aleksandrov return mhash >> 1; 1957bf4e0a3dSNikolay Aleksandrov } 195879a13159SPeter Nørlund #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 195979a13159SPeter Nørlund 19605969f71dSStephen Hemminger static int ip_mkroute_input(struct sk_buff *skb, 19611da177e4SLinus Torvalds struct fib_result *res, 19621da177e4SLinus Torvalds struct in_device *in_dev, 1963e37b1e97SRoopa Prabhu __be32 daddr, __be32 saddr, u32 tos, 1964e37b1e97SRoopa Prabhu struct flow_keys *hkeys) 19651da177e4SLinus Torvalds { 19661da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH 19675481d73fSDavid Ahern if (res->fi && fib_info_num_path(res->fi) > 1) { 19687efc0b6bSDavid Ahern int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); 19690e884c78SPeter Nørlund 19700e884c78SPeter Nørlund fib_select_multipath(res, h); 19710e884c78SPeter Nørlund } 19721da177e4SLinus Torvalds #endif 19731da177e4SLinus Torvalds 19741da177e4SLinus Torvalds /* create a routing cache entry */ 1975c6cffba4SDavid S. Miller return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); 19761da177e4SLinus Torvalds } 19771da177e4SLinus Torvalds 19781da177e4SLinus Torvalds /* 19791da177e4SLinus Torvalds * NOTE. We drop all the packets that has local source 19801da177e4SLinus Torvalds * addresses, because every properly looped back packet 19811da177e4SLinus Torvalds * must have correct destination already attached by output routine. 19821da177e4SLinus Torvalds * 19831da177e4SLinus Torvalds * Such approach solves two big problems: 19841da177e4SLinus Torvalds * 1. Not simplex devices are handled properly. 19851da177e4SLinus Torvalds * 2. IP spoofing attempts are filtered with 100% of guarantee. 1986ebc0ffaeSEric Dumazet * called with rcu_read_lock() 19871da177e4SLinus Torvalds */ 19881da177e4SLinus Torvalds 19899e12bb22SAl Viro static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, 19905510cdf7SDavid Ahern u8 tos, struct net_device *dev, 19915510cdf7SDavid Ahern struct fib_result *res) 19921da177e4SLinus Torvalds { 199396d36220SEric Dumazet struct in_device *in_dev = __in_dev_get_rcu(dev); 1994e37b1e97SRoopa Prabhu struct flow_keys *flkeys = NULL, _flkeys; 1995e37b1e97SRoopa Prabhu struct net *net = dev_net(dev); 19961b7179d3SThomas Graf struct ip_tunnel_info *tun_info; 1997e37b1e97SRoopa Prabhu int err = -EINVAL; 199895c96174SEric Dumazet unsigned int flags = 0; 19991da177e4SLinus Torvalds u32 itag = 0; 20001da177e4SLinus Torvalds struct rtable *rth; 2001e37b1e97SRoopa Prabhu struct flowi4 fl4; 20020a90478bSXin Long bool do_cache = true; 20031da177e4SLinus Torvalds 20041da177e4SLinus Torvalds /* IP on this device is disabled. */ 20051da177e4SLinus Torvalds 20061da177e4SLinus Torvalds if (!in_dev) 20071da177e4SLinus Torvalds goto out; 20081da177e4SLinus Torvalds 20091da177e4SLinus Torvalds /* Check for the most weird martians, which can be not detected 20101da177e4SLinus Torvalds by fib_lookup. 20111da177e4SLinus Torvalds */ 20121da177e4SLinus Torvalds 201361adedf3SJiri Benc tun_info = skb_tunnel_info(skb); 201446fa062aSJiri Benc if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 20151b7179d3SThomas Graf fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id; 20161b7179d3SThomas Graf else 20171b7179d3SThomas Graf fl4.flowi4_tun_key.tun_id = 0; 2018f38a9eb1SThomas Graf skb_dst_drop(skb); 2019f38a9eb1SThomas Graf 2020d0daebc3SThomas Graf if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) 20211da177e4SLinus Torvalds goto martian_source; 20221da177e4SLinus Torvalds 20235510cdf7SDavid Ahern res->fi = NULL; 20245510cdf7SDavid Ahern res->table = NULL; 202527a954bdSAndy Walls if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) 20261da177e4SLinus Torvalds goto brd_input; 20271da177e4SLinus Torvalds 20281da177e4SLinus Torvalds /* Accept zero addresses only to limited broadcast; 20291da177e4SLinus Torvalds * I even do not know to fix it or not. Waiting for complains :-) 20301da177e4SLinus Torvalds */ 2031f97c1e0cSJoe Perches if (ipv4_is_zeronet(saddr)) 20321da177e4SLinus Torvalds goto martian_source; 20331da177e4SLinus Torvalds 2034d0daebc3SThomas Graf if (ipv4_is_zeronet(daddr)) 20351da177e4SLinus Torvalds goto martian_destination; 20361da177e4SLinus Torvalds 20379eb43e76SEric Dumazet /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), 20389eb43e76SEric Dumazet * and call it once if daddr or/and saddr are loopback addresses 20399eb43e76SEric Dumazet */ 20409eb43e76SEric Dumazet if (ipv4_is_loopback(daddr)) { 20419eb43e76SEric Dumazet if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) 2042d0daebc3SThomas Graf goto martian_destination; 20439eb43e76SEric Dumazet } else if (ipv4_is_loopback(saddr)) { 20449eb43e76SEric Dumazet if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) 2045d0daebc3SThomas Graf goto martian_source; 2046d0daebc3SThomas Graf } 2047d0daebc3SThomas Graf 20481da177e4SLinus Torvalds /* 20491da177e4SLinus Torvalds * Now we are ready to route packet. 20501da177e4SLinus Torvalds */ 205168a5e3ddSDavid S. Miller fl4.flowi4_oif = 0; 2052e0d56fddSDavid Ahern fl4.flowi4_iif = dev->ifindex; 205368a5e3ddSDavid S. Miller fl4.flowi4_mark = skb->mark; 205468a5e3ddSDavid S. Miller fl4.flowi4_tos = tos; 205568a5e3ddSDavid S. Miller fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 2056b84f7878SDavid Ahern fl4.flowi4_flags = 0; 205768a5e3ddSDavid S. Miller fl4.daddr = daddr; 205868a5e3ddSDavid S. Miller fl4.saddr = saddr; 20598bcfd092SJulian Anastasov fl4.flowi4_uid = sock_net_uid(net, NULL); 2060e37b1e97SRoopa Prabhu 20615a847a6eSDavid Ahern if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { 2062e37b1e97SRoopa Prabhu flkeys = &_flkeys; 20635a847a6eSDavid Ahern } else { 20645a847a6eSDavid Ahern fl4.flowi4_proto = 0; 20655a847a6eSDavid Ahern fl4.fl4_sport = 0; 20665a847a6eSDavid Ahern fl4.fl4_dport = 0; 20675a847a6eSDavid Ahern } 2068e37b1e97SRoopa Prabhu 20695510cdf7SDavid Ahern err = fib_lookup(net, &fl4, res, 0); 2070cd0f0b95SDuan Jiong if (err != 0) { 2071cd0f0b95SDuan Jiong if (!IN_DEV_FORWARD(in_dev)) 2072cd0f0b95SDuan Jiong err = -EHOSTUNREACH; 20731da177e4SLinus Torvalds goto no_route; 2074cd0f0b95SDuan Jiong } 20751da177e4SLinus Torvalds 20765cbf777cSXin Long if (res->type == RTN_BROADCAST) { 20775cbf777cSXin Long if (IN_DEV_BFORWARD(in_dev)) 20785cbf777cSXin Long goto make_route; 20790a90478bSXin Long /* not do cache if bc_forwarding is enabled */ 20800a90478bSXin Long if (IPV4_DEVCONF_ALL(net, BC_FORWARDING)) 20810a90478bSXin Long do_cache = false; 20821da177e4SLinus Torvalds goto brd_input; 20835cbf777cSXin Long } 20841da177e4SLinus Torvalds 20855510cdf7SDavid Ahern if (res->type == RTN_LOCAL) { 20865c04c819SMichael Smith err = fib_validate_source(skb, saddr, daddr, tos, 20870d5edc68SCong Wang 0, dev, in_dev, &itag); 2088b5f7e755SEric Dumazet if (err < 0) 20890d753960SDavid Ahern goto martian_source; 20901da177e4SLinus Torvalds goto local_input; 20911da177e4SLinus Torvalds } 20921da177e4SLinus Torvalds 2093cd0f0b95SDuan Jiong if (!IN_DEV_FORWARD(in_dev)) { 2094cd0f0b95SDuan Jiong err = -EHOSTUNREACH; 2095251da413SDavid S. Miller goto no_route; 2096cd0f0b95SDuan Jiong } 20975510cdf7SDavid Ahern if (res->type != RTN_UNICAST) 20981da177e4SLinus Torvalds goto martian_destination; 20991da177e4SLinus Torvalds 21005cbf777cSXin Long make_route: 2101e37b1e97SRoopa Prabhu err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); 21021da177e4SLinus Torvalds out: return err; 21031da177e4SLinus Torvalds 21041da177e4SLinus Torvalds brd_input: 21051da177e4SLinus Torvalds if (skb->protocol != htons(ETH_P_IP)) 21061da177e4SLinus Torvalds goto e_inval; 21071da177e4SLinus Torvalds 210841347dcdSDavid S. Miller if (!ipv4_is_zeronet(saddr)) { 21099e56e380SDavid S. Miller err = fib_validate_source(skb, saddr, 0, tos, 0, dev, 21109e56e380SDavid S. Miller in_dev, &itag); 21111da177e4SLinus Torvalds if (err < 0) 21120d753960SDavid Ahern goto martian_source; 21131da177e4SLinus Torvalds } 21141da177e4SLinus Torvalds flags |= RTCF_BROADCAST; 21155510cdf7SDavid Ahern res->type = RTN_BROADCAST; 21161da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_brd); 21171da177e4SLinus Torvalds 21181da177e4SLinus Torvalds local_input: 21190a90478bSXin Long do_cache &= res->fi && !itag; 21200a90478bSXin Long if (do_cache) { 2121eba618abSDavid Ahern struct fib_nh_common *nhc = FIB_RES_NHC(*res); 2122eba618abSDavid Ahern 21230f457a36SDavid Ahern rth = rcu_dereference(nhc->nhc_rth_input); 2124d2d68ba9SDavid S. Miller if (rt_cache_valid(rth)) { 2125c6cffba4SDavid S. Miller skb_dst_set_noref(skb, &rth->dst); 2126c6cffba4SDavid S. Miller err = 0; 2127c6cffba4SDavid S. Miller goto out; 2128d2d68ba9SDavid S. Miller } 2129d2d68ba9SDavid S. Miller } 2130d2d68ba9SDavid S. Miller 2131f5a0aab8SDavid Ahern rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, 21325510cdf7SDavid Ahern flags | RTCF_LOCAL, res->type, 2133d2d68ba9SDavid S. Miller IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); 21341da177e4SLinus Torvalds if (!rth) 21351da177e4SLinus Torvalds goto e_nobufs; 21361da177e4SLinus Torvalds 2137d8d1f30bSChangli Gao rth->dst.output= ip_rt_bug; 2138cf911662SDavid S. Miller #ifdef CONFIG_IP_ROUTE_CLASSID 2139cf911662SDavid S. Miller rth->dst.tclassid = itag; 2140cf911662SDavid S. Miller #endif 21419917e1e8SDavid S. Miller rth->rt_is_input = 1; 2142571e7226SRoopa Prabhu 2143a6254864SDuan Jiong RT_CACHE_STAT_INC(in_slow_tot); 21445510cdf7SDavid Ahern if (res->type == RTN_UNREACHABLE) { 2145d8d1f30bSChangli Gao rth->dst.input= ip_error; 2146d8d1f30bSChangli Gao rth->dst.error= -err; 21471da177e4SLinus Torvalds rth->rt_flags &= ~RTCF_LOCAL; 21481da177e4SLinus Torvalds } 2149efd85700SThomas Graf 2150dcdfdf56SAlexei Starovoitov if (do_cache) { 2151eba618abSDavid Ahern struct fib_nh_common *nhc = FIB_RES_NHC(*res); 2152efd85700SThomas Graf 2153eba618abSDavid Ahern rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); 2154efd85700SThomas Graf if (lwtunnel_input_redirect(rth->dst.lwtstate)) { 2155efd85700SThomas Graf WARN_ON(rth->dst.input == lwtunnel_input); 2156efd85700SThomas Graf rth->dst.lwtstate->orig_input = rth->dst.input; 2157efd85700SThomas Graf rth->dst.input = lwtunnel_input; 2158efd85700SThomas Graf } 2159efd85700SThomas Graf 216087063a1fSDavid Ahern if (unlikely(!rt_cache_route(nhc, rth))) 2161dcdfdf56SAlexei Starovoitov rt_add_uncached_list(rth); 2162dcdfdf56SAlexei Starovoitov } 216389aef892SDavid S. Miller skb_dst_set(skb, &rth->dst); 2164b23dd4feSDavid S. Miller err = 0; 2165ebc0ffaeSEric Dumazet goto out; 21661da177e4SLinus Torvalds 21671da177e4SLinus Torvalds no_route: 21681da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_no_route); 21695510cdf7SDavid Ahern res->type = RTN_UNREACHABLE; 21705510cdf7SDavid Ahern res->fi = NULL; 21715510cdf7SDavid Ahern res->table = NULL; 21721da177e4SLinus Torvalds goto local_input; 21731da177e4SLinus Torvalds 21741da177e4SLinus Torvalds /* 21751da177e4SLinus Torvalds * Do not cache martian addresses: they should be logged (RFC1812) 21761da177e4SLinus Torvalds */ 21771da177e4SLinus Torvalds martian_destination: 21781da177e4SLinus Torvalds RT_CACHE_STAT_INC(in_martian_dst); 21791da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE 2180e87cc472SJoe Perches if (IN_DEV_LOG_MARTIANS(in_dev)) 2181e87cc472SJoe Perches net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", 2182673d57e7SHarvey Harrison &daddr, &saddr, dev->name); 21831da177e4SLinus Torvalds #endif 21842c2910a4SDietmar Eggemann 21851da177e4SLinus Torvalds e_inval: 21861da177e4SLinus Torvalds err = -EINVAL; 2187ebc0ffaeSEric Dumazet goto out; 21881da177e4SLinus Torvalds 21891da177e4SLinus Torvalds e_nobufs: 21901da177e4SLinus Torvalds err = -ENOBUFS; 2191ebc0ffaeSEric Dumazet goto out; 21921da177e4SLinus Torvalds 21931da177e4SLinus Torvalds martian_source: 21941da177e4SLinus Torvalds ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2195ebc0ffaeSEric Dumazet goto out; 21961da177e4SLinus Torvalds } 21971da177e4SLinus Torvalds 2198c6cffba4SDavid S. Miller int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, 219938a424e4SDavid Miller u8 tos, struct net_device *dev) 22001da177e4SLinus Torvalds { 22015510cdf7SDavid Ahern struct fib_result res; 22025510cdf7SDavid Ahern int err; 22031da177e4SLinus Torvalds 22046e28099dSJulian Anastasov tos &= IPTOS_RT_MASK; 220596d36220SEric Dumazet rcu_read_lock(); 22065510cdf7SDavid Ahern err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); 22075510cdf7SDavid Ahern rcu_read_unlock(); 220896d36220SEric Dumazet 22095510cdf7SDavid Ahern return err; 22105510cdf7SDavid Ahern } 22115510cdf7SDavid Ahern EXPORT_SYMBOL(ip_route_input_noref); 22125510cdf7SDavid Ahern 22135510cdf7SDavid Ahern /* called with rcu_read_lock held */ 22145510cdf7SDavid Ahern int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, 22155510cdf7SDavid Ahern u8 tos, struct net_device *dev, struct fib_result *res) 22165510cdf7SDavid Ahern { 22171da177e4SLinus Torvalds /* Multicast recognition logic is moved from route cache to here. 22181da177e4SLinus Torvalds The problem was that too many Ethernet cards have broken/missing 22191da177e4SLinus Torvalds hardware multicast filters :-( As result the host on multicasting 22201da177e4SLinus Torvalds network acquires a lot of useless route cache entries, sort of 22211da177e4SLinus Torvalds SDR messages from all the world. Now we try to get rid of them. 22221da177e4SLinus Torvalds Really, provided software IP multicast filter is organized 22231da177e4SLinus Torvalds reasonably (at least, hashed), it does not result in a slowdown 22241da177e4SLinus Torvalds comparing with route cache reject entries. 22251da177e4SLinus Torvalds Note, that multicast routers are not affected, because 22261da177e4SLinus Torvalds route cache entry is created eventually. 22271da177e4SLinus Torvalds */ 2228f97c1e0cSJoe Perches if (ipv4_is_multicast(daddr)) { 222996d36220SEric Dumazet struct in_device *in_dev = __in_dev_get_rcu(dev); 2230e58e4159SDavid Ahern int our = 0; 22315510cdf7SDavid Ahern int err = -EINVAL; 22321da177e4SLinus Torvalds 223322c74764SPaolo Abeni if (!in_dev) 223422c74764SPaolo Abeni return err; 2235e58e4159SDavid Ahern our = ip_check_mc_rcu(in_dev, daddr, saddr, 2236eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->protocol); 2237e58e4159SDavid Ahern 2238e58e4159SDavid Ahern /* check l3 master if no match yet */ 223922c74764SPaolo Abeni if (!our && netif_is_l3_slave(dev)) { 2240e58e4159SDavid Ahern struct in_device *l3_in_dev; 2241e58e4159SDavid Ahern 2242e58e4159SDavid Ahern l3_in_dev = __in_dev_get_rcu(skb->dev); 2243e58e4159SDavid Ahern if (l3_in_dev) 2244e58e4159SDavid Ahern our = ip_check_mc_rcu(l3_in_dev, daddr, saddr, 2245e58e4159SDavid Ahern ip_hdr(skb)->protocol); 2246e58e4159SDavid Ahern } 2247e58e4159SDavid Ahern 22481da177e4SLinus Torvalds if (our 22491da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE 22509d4fb27dSJoe Perches || 22519d4fb27dSJoe Perches (!ipv4_is_local_multicast(daddr) && 2252f97c1e0cSJoe Perches IN_DEV_MFORWARD(in_dev)) 22531da177e4SLinus Torvalds #endif 22541da177e4SLinus Torvalds ) { 22555510cdf7SDavid Ahern err = ip_route_input_mc(skb, daddr, saddr, 22561da177e4SLinus Torvalds tos, dev, our); 2257e58e4159SDavid Ahern } 22585510cdf7SDavid Ahern return err; 22591da177e4SLinus Torvalds } 22605510cdf7SDavid Ahern 22615510cdf7SDavid Ahern return ip_route_input_slow(skb, daddr, saddr, tos, dev, res); 22621da177e4SLinus Torvalds } 22631da177e4SLinus Torvalds 2264ebc0ffaeSEric Dumazet /* called with rcu_read_lock() */ 2265982721f3SDavid S. Miller static struct rtable *__mkroute_output(const struct fib_result *res, 22661a00fee4SDavid Miller const struct flowi4 *fl4, int orig_oif, 2267f61759e6SJulian Anastasov struct net_device *dev_out, 22685ada5527SDavid S. Miller unsigned int flags) 22691da177e4SLinus Torvalds { 2270982721f3SDavid S. Miller struct fib_info *fi = res->fi; 2271f2bb4bedSDavid S. Miller struct fib_nh_exception *fnhe; 22725ada5527SDavid S. Miller struct in_device *in_dev; 2273982721f3SDavid S. Miller u16 type = res->type; 22745ada5527SDavid S. Miller struct rtable *rth; 2275c92b9655SJulian Anastasov bool do_cache; 22761da177e4SLinus Torvalds 2277d0daebc3SThomas Graf in_dev = __in_dev_get_rcu(dev_out); 2278d0daebc3SThomas Graf if (!in_dev) 2279d0daebc3SThomas Graf return ERR_PTR(-EINVAL); 2280d0daebc3SThomas Graf 2281d0daebc3SThomas Graf if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) 22825f02ce24SDavid Ahern if (ipv4_is_loopback(fl4->saddr) && 22835f02ce24SDavid Ahern !(dev_out->flags & IFF_LOOPBACK) && 22845f02ce24SDavid Ahern !netif_is_l3_master(dev_out)) 22855ada5527SDavid S. Miller return ERR_PTR(-EINVAL); 22861da177e4SLinus Torvalds 228768a5e3ddSDavid S. Miller if (ipv4_is_lbcast(fl4->daddr)) 2288982721f3SDavid S. Miller type = RTN_BROADCAST; 228968a5e3ddSDavid S. Miller else if (ipv4_is_multicast(fl4->daddr)) 2290982721f3SDavid S. Miller type = RTN_MULTICAST; 229168a5e3ddSDavid S. Miller else if (ipv4_is_zeronet(fl4->daddr)) 22925ada5527SDavid S. Miller return ERR_PTR(-EINVAL); 22931da177e4SLinus Torvalds 22941da177e4SLinus Torvalds if (dev_out->flags & IFF_LOOPBACK) 22951da177e4SLinus Torvalds flags |= RTCF_LOCAL; 22961da177e4SLinus Torvalds 229763617421SJulian Anastasov do_cache = true; 2298982721f3SDavid S. Miller if (type == RTN_BROADCAST) { 22991da177e4SLinus Torvalds flags |= RTCF_BROADCAST | RTCF_LOCAL; 2300982721f3SDavid S. Miller fi = NULL; 2301982721f3SDavid S. Miller } else if (type == RTN_MULTICAST) { 23021da177e4SLinus Torvalds flags |= RTCF_MULTICAST | RTCF_LOCAL; 2303813b3b5dSDavid S. Miller if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, 2304813b3b5dSDavid S. Miller fl4->flowi4_proto)) 23051da177e4SLinus Torvalds flags &= ~RTCF_LOCAL; 230663617421SJulian Anastasov else 230763617421SJulian Anastasov do_cache = false; 23081da177e4SLinus Torvalds /* If multicast route do not exist use 2309dd28d1a0SEric Dumazet * default one, but do not gateway in this case. 2310dd28d1a0SEric Dumazet * Yes, it is hack. 23111da177e4SLinus Torvalds */ 2312982721f3SDavid S. Miller if (fi && res->prefixlen < 4) 2313982721f3SDavid S. Miller fi = NULL; 2314d6d5e999SChris Friesen } else if ((type == RTN_LOCAL) && (orig_oif != 0) && 2315d6d5e999SChris Friesen (orig_oif != dev_out->ifindex)) { 2316d6d5e999SChris Friesen /* For local routes that require a particular output interface 2317d6d5e999SChris Friesen * we do not want to cache the result. Caching the result 2318d6d5e999SChris Friesen * causes incorrect behaviour when there are multiple source 2319d6d5e999SChris Friesen * addresses on the interface, the end result being that if the 2320d6d5e999SChris Friesen * intended recipient is waiting on that interface for the 2321d6d5e999SChris Friesen * packet he won't receive it because it will be delivered on 2322d6d5e999SChris Friesen * the loopback interface and the IP_PKTINFO ipi_ifindex will 2323d6d5e999SChris Friesen * be set to the loopback interface as well. 2324d6d5e999SChris Friesen */ 232594720e3aSJulian Anastasov do_cache = false; 23261da177e4SLinus Torvalds } 23271da177e4SLinus Torvalds 2328f2bb4bedSDavid S. Miller fnhe = NULL; 232963617421SJulian Anastasov do_cache &= fi != NULL; 233094720e3aSJulian Anastasov if (fi) { 2331eba618abSDavid Ahern struct fib_nh_common *nhc = FIB_RES_NHC(*res); 2332d26b3a7cSEric Dumazet struct rtable __rcu **prth; 2333d26b3a7cSEric Dumazet 2334a5995e71SDavid Ahern fnhe = find_exception(nhc, fl4->daddr); 233594720e3aSJulian Anastasov if (!do_cache) 233694720e3aSJulian Anastasov goto add; 2337deed49dfSXin Long if (fnhe) { 23382ffae99dSTimo Teräs prth = &fnhe->fnhe_rth_output; 2339deed49dfSXin Long } else { 2340c92b9655SJulian Anastasov if (unlikely(fl4->flowi4_flags & 2341c92b9655SJulian Anastasov FLOWI_FLAG_KNOWN_NH && 2342bdf00467SDavid Ahern !(nhc->nhc_gw_family && 2343eba618abSDavid Ahern nhc->nhc_scope == RT_SCOPE_LINK))) { 2344c92b9655SJulian Anastasov do_cache = false; 2345c92b9655SJulian Anastasov goto add; 2346c92b9655SJulian Anastasov } 23470f457a36SDavid Ahern prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); 234894720e3aSJulian Anastasov } 2349d26b3a7cSEric Dumazet rth = rcu_dereference(*prth); 23509df16efaSWei Wang if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) 2351f2bb4bedSDavid S. Miller return rth; 2352f2bb4bedSDavid S. Miller } 2353c92b9655SJulian Anastasov 2354c92b9655SJulian Anastasov add: 2355d08c4f35SDavid Ahern rth = rt_dst_alloc(dev_out, flags, type, 23565c1e6aa3SDavid S. Miller IN_DEV_CONF_GET(in_dev, NOPOLICY), 2357f2bb4bedSDavid S. Miller IN_DEV_CONF_GET(in_dev, NOXFRM), 2358c92b9655SJulian Anastasov do_cache); 23598391d07bSDimitris Michailidis if (!rth) 23605ada5527SDavid S. Miller return ERR_PTR(-ENOBUFS); 23618391d07bSDimitris Michailidis 23629438c871SDavid Ahern rth->rt_iif = orig_oif; 2363b7503e0cSDavid Ahern 23641da177e4SLinus Torvalds RT_CACHE_STAT_INC(out_slow_tot); 23651da177e4SLinus Torvalds 23661da177e4SLinus Torvalds if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 23671da177e4SLinus Torvalds if (flags & RTCF_LOCAL && 23681da177e4SLinus Torvalds !(dev_out->flags & IFF_LOOPBACK)) { 2369d8d1f30bSChangli Gao rth->dst.output = ip_mc_output; 23701da177e4SLinus Torvalds RT_CACHE_STAT_INC(out_slow_mc); 23711da177e4SLinus Torvalds } 23721da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE 2373982721f3SDavid S. Miller if (type == RTN_MULTICAST) { 23741da177e4SLinus Torvalds if (IN_DEV_MFORWARD(in_dev) && 2375813b3b5dSDavid S. Miller !ipv4_is_local_multicast(fl4->daddr)) { 2376d8d1f30bSChangli Gao rth->dst.input = ip_mr_input; 2377d8d1f30bSChangli Gao rth->dst.output = ip_mc_output; 23781da177e4SLinus Torvalds } 23791da177e4SLinus Torvalds } 23801da177e4SLinus Torvalds #endif 23811da177e4SLinus Torvalds } 23821da177e4SLinus Torvalds 2383a4c2fd7fSWei Wang rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); 23849942895bSDavid Ahern lwtunnel_set_redirect(&rth->dst); 23851da177e4SLinus Torvalds 23865ada5527SDavid S. Miller return rth; 23871da177e4SLinus Torvalds } 23881da177e4SLinus Torvalds 23891da177e4SLinus Torvalds /* 23901da177e4SLinus Torvalds * Major route resolver routine. 23911da177e4SLinus Torvalds */ 23921da177e4SLinus Torvalds 23933abd1adeSDavid Ahern struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, 2394bf4e0a3dSNikolay Aleksandrov const struct sk_buff *skb) 23951da177e4SLinus Torvalds { 2396f61759e6SJulian Anastasov __u8 tos = RT_FL_TOS(fl4); 2397d0ea2b12SEric Dumazet struct fib_result res = { 2398d0ea2b12SEric Dumazet .type = RTN_UNSPEC, 2399d0ea2b12SEric Dumazet .fi = NULL, 2400d0ea2b12SEric Dumazet .table = NULL, 2401d0ea2b12SEric Dumazet .tclassid = 0, 2402d0ea2b12SEric Dumazet }; 24035ada5527SDavid S. Miller struct rtable *rth; 24041da177e4SLinus Torvalds 24051fb9489bSPavel Emelyanov fl4->flowi4_iif = LOOPBACK_IFINDEX; 2406813b3b5dSDavid S. Miller fl4->flowi4_tos = tos & IPTOS_RT_MASK; 2407813b3b5dSDavid S. Miller fl4->flowi4_scope = ((tos & RTO_ONLINK) ? 240844713b67SDavid S. Miller RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); 240944713b67SDavid S. Miller 2410010c2708SDavid S. Miller rcu_read_lock(); 24113abd1adeSDavid Ahern rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); 24123abd1adeSDavid Ahern rcu_read_unlock(); 24133abd1adeSDavid Ahern 24143abd1adeSDavid Ahern return rth; 24153abd1adeSDavid Ahern } 24163abd1adeSDavid Ahern EXPORT_SYMBOL_GPL(ip_route_output_key_hash); 24173abd1adeSDavid Ahern 24183abd1adeSDavid Ahern struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, 24193abd1adeSDavid Ahern struct fib_result *res, 24203abd1adeSDavid Ahern const struct sk_buff *skb) 24213abd1adeSDavid Ahern { 24223abd1adeSDavid Ahern struct net_device *dev_out = NULL; 24233abd1adeSDavid Ahern int orig_oif = fl4->flowi4_oif; 24243abd1adeSDavid Ahern unsigned int flags = 0; 24253abd1adeSDavid Ahern struct rtable *rth; 24263abd1adeSDavid Ahern int err = -ENETUNREACH; 24273abd1adeSDavid Ahern 2428813b3b5dSDavid S. Miller if (fl4->saddr) { 2429b23dd4feSDavid S. Miller rth = ERR_PTR(-EINVAL); 2430813b3b5dSDavid S. Miller if (ipv4_is_multicast(fl4->saddr) || 2431813b3b5dSDavid S. Miller ipv4_is_lbcast(fl4->saddr) || 2432813b3b5dSDavid S. Miller ipv4_is_zeronet(fl4->saddr)) 24331da177e4SLinus Torvalds goto out; 24341da177e4SLinus Torvalds 24351da177e4SLinus Torvalds /* I removed check for oif == dev_out->oif here. 24361da177e4SLinus Torvalds It was wrong for two reasons: 24371ab35276SDenis V. Lunev 1. ip_dev_find(net, saddr) can return wrong iface, if saddr 24381ab35276SDenis V. Lunev is assigned to multiple interfaces. 24391da177e4SLinus Torvalds 2. Moreover, we are allowed to send packets with saddr 24401da177e4SLinus Torvalds of another iface. --ANK 24411da177e4SLinus Torvalds */ 24421da177e4SLinus Torvalds 2443813b3b5dSDavid S. Miller if (fl4->flowi4_oif == 0 && 2444813b3b5dSDavid S. Miller (ipv4_is_multicast(fl4->daddr) || 2445813b3b5dSDavid S. Miller ipv4_is_lbcast(fl4->daddr))) { 2446a210d01aSJulian Anastasov /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2447813b3b5dSDavid S. Miller dev_out = __ip_dev_find(net, fl4->saddr, false); 244851456b29SIan Morris if (!dev_out) 2449a210d01aSJulian Anastasov goto out; 2450a210d01aSJulian Anastasov 24511da177e4SLinus Torvalds /* Special hack: user can direct multicasts 24521da177e4SLinus Torvalds and limited broadcast via necessary interface 24531da177e4SLinus Torvalds without fiddling with IP_MULTICAST_IF or IP_PKTINFO. 24541da177e4SLinus Torvalds This hack is not just for fun, it allows 24551da177e4SLinus Torvalds vic,vat and friends to work. 24561da177e4SLinus Torvalds They bind socket to loopback, set ttl to zero 24571da177e4SLinus Torvalds and expect that it will work. 24581da177e4SLinus Torvalds From the viewpoint of routing cache they are broken, 24591da177e4SLinus Torvalds because we are not allowed to build multicast path 24601da177e4SLinus Torvalds with loopback source addr (look, routing cache 24611da177e4SLinus Torvalds cannot know, that ttl is zero, so that packet 24621da177e4SLinus Torvalds will not leave this host and route is valid). 24631da177e4SLinus Torvalds Luckily, this hack is good workaround. 24641da177e4SLinus Torvalds */ 24651da177e4SLinus Torvalds 2466813b3b5dSDavid S. Miller fl4->flowi4_oif = dev_out->ifindex; 24671da177e4SLinus Torvalds goto make_route; 24681da177e4SLinus Torvalds } 2469a210d01aSJulian Anastasov 2470813b3b5dSDavid S. Miller if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { 2471a210d01aSJulian Anastasov /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2472813b3b5dSDavid S. Miller if (!__ip_dev_find(net, fl4->saddr, false)) 2473a210d01aSJulian Anastasov goto out; 24741da177e4SLinus Torvalds } 2475a210d01aSJulian Anastasov } 24761da177e4SLinus Torvalds 24771da177e4SLinus Torvalds 2478813b3b5dSDavid S. Miller if (fl4->flowi4_oif) { 2479813b3b5dSDavid S. Miller dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); 2480b23dd4feSDavid S. Miller rth = ERR_PTR(-ENODEV); 248151456b29SIan Morris if (!dev_out) 24821da177e4SLinus Torvalds goto out; 2483e5ed6399SHerbert Xu 2484e5ed6399SHerbert Xu /* RACE: Check return value of inet_select_addr instead. */ 2485fc75fc83SEric Dumazet if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { 2486b23dd4feSDavid S. Miller rth = ERR_PTR(-ENETUNREACH); 2487fc75fc83SEric Dumazet goto out; 2488fc75fc83SEric Dumazet } 2489813b3b5dSDavid S. Miller if (ipv4_is_local_multicast(fl4->daddr) || 24906a211654SAndrew Lunn ipv4_is_lbcast(fl4->daddr) || 24916a211654SAndrew Lunn fl4->flowi4_proto == IPPROTO_IGMP) { 2492813b3b5dSDavid S. Miller if (!fl4->saddr) 2493813b3b5dSDavid S. Miller fl4->saddr = inet_select_addr(dev_out, 0, 24941da177e4SLinus Torvalds RT_SCOPE_LINK); 24951da177e4SLinus Torvalds goto make_route; 24961da177e4SLinus Torvalds } 24970a7e2260SJiri Benc if (!fl4->saddr) { 2498813b3b5dSDavid S. Miller if (ipv4_is_multicast(fl4->daddr)) 2499813b3b5dSDavid S. Miller fl4->saddr = inet_select_addr(dev_out, 0, 2500813b3b5dSDavid S. Miller fl4->flowi4_scope); 2501813b3b5dSDavid S. Miller else if (!fl4->daddr) 2502813b3b5dSDavid S. Miller fl4->saddr = inet_select_addr(dev_out, 0, 25031da177e4SLinus Torvalds RT_SCOPE_HOST); 25041da177e4SLinus Torvalds } 2505613d09b3SDavid Ahern } 25061da177e4SLinus Torvalds 2507813b3b5dSDavid S. Miller if (!fl4->daddr) { 2508813b3b5dSDavid S. Miller fl4->daddr = fl4->saddr; 2509813b3b5dSDavid S. Miller if (!fl4->daddr) 2510813b3b5dSDavid S. Miller fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); 2511b40afd0eSDenis V. Lunev dev_out = net->loopback_dev; 25121fb9489bSPavel Emelyanov fl4->flowi4_oif = LOOPBACK_IFINDEX; 25133abd1adeSDavid Ahern res->type = RTN_LOCAL; 25141da177e4SLinus Torvalds flags |= RTCF_LOCAL; 25151da177e4SLinus Torvalds goto make_route; 25161da177e4SLinus Torvalds } 25171da177e4SLinus Torvalds 25183abd1adeSDavid Ahern err = fib_lookup(net, fl4, res, 0); 25190315e382SNikola Forró if (err) { 25203abd1adeSDavid Ahern res->fi = NULL; 25213abd1adeSDavid Ahern res->table = NULL; 25226104e112SDavid Ahern if (fl4->flowi4_oif && 2523e58e4159SDavid Ahern (ipv4_is_multicast(fl4->daddr) || 2524e58e4159SDavid Ahern !netif_index_is_l3_master(net, fl4->flowi4_oif))) { 25251da177e4SLinus Torvalds /* Apparently, routing tables are wrong. Assume, 25261da177e4SLinus Torvalds that the destination is on link. 25271da177e4SLinus Torvalds 25281da177e4SLinus Torvalds WHY? DW. 25291da177e4SLinus Torvalds Because we are allowed to send to iface 25301da177e4SLinus Torvalds even if it has NO routes and NO assigned 25311da177e4SLinus Torvalds addresses. When oif is specified, routing 25321da177e4SLinus Torvalds tables are looked up with only one purpose: 25331da177e4SLinus Torvalds to catch if destination is gatewayed, rather than 25341da177e4SLinus Torvalds direct. Moreover, if MSG_DONTROUTE is set, 25351da177e4SLinus Torvalds we send packet, ignoring both routing tables 25361da177e4SLinus Torvalds and ifaddr state. --ANK 25371da177e4SLinus Torvalds 25381da177e4SLinus Torvalds 25391da177e4SLinus Torvalds We could make it even if oif is unknown, 25401da177e4SLinus Torvalds likely IPv6, but we do not. 25411da177e4SLinus Torvalds */ 25421da177e4SLinus Torvalds 2543813b3b5dSDavid S. Miller if (fl4->saddr == 0) 2544813b3b5dSDavid S. Miller fl4->saddr = inet_select_addr(dev_out, 0, 25451da177e4SLinus Torvalds RT_SCOPE_LINK); 25463abd1adeSDavid Ahern res->type = RTN_UNICAST; 25471da177e4SLinus Torvalds goto make_route; 25481da177e4SLinus Torvalds } 25490315e382SNikola Forró rth = ERR_PTR(err); 25501da177e4SLinus Torvalds goto out; 25511da177e4SLinus Torvalds } 25521da177e4SLinus Torvalds 25533abd1adeSDavid Ahern if (res->type == RTN_LOCAL) { 2554813b3b5dSDavid S. Miller if (!fl4->saddr) { 25553abd1adeSDavid Ahern if (res->fi->fib_prefsrc) 25563abd1adeSDavid Ahern fl4->saddr = res->fi->fib_prefsrc; 25579fc3bbb4SJoel Sing else 2558813b3b5dSDavid S. Miller fl4->saddr = fl4->daddr; 25599fc3bbb4SJoel Sing } 25605f02ce24SDavid Ahern 25615f02ce24SDavid Ahern /* L3 master device is the loopback for that domain */ 25623abd1adeSDavid Ahern dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : 2563b7c8487cSRobert Shearman net->loopback_dev; 2564839da4d9SDavid Ahern 2565839da4d9SDavid Ahern /* make sure orig_oif points to fib result device even 2566839da4d9SDavid Ahern * though packet rx/tx happens over loopback or l3mdev 2567839da4d9SDavid Ahern */ 2568839da4d9SDavid Ahern orig_oif = FIB_RES_OIF(*res); 2569839da4d9SDavid Ahern 2570813b3b5dSDavid S. Miller fl4->flowi4_oif = dev_out->ifindex; 25711da177e4SLinus Torvalds flags |= RTCF_LOCAL; 25721da177e4SLinus Torvalds goto make_route; 25731da177e4SLinus Torvalds } 25741da177e4SLinus Torvalds 25753abd1adeSDavid Ahern fib_select_path(net, res, fl4, skb); 25761da177e4SLinus Torvalds 25773abd1adeSDavid Ahern dev_out = FIB_RES_DEV(*res); 2578813b3b5dSDavid S. Miller fl4->flowi4_oif = dev_out->ifindex; 25791da177e4SLinus Torvalds 25801da177e4SLinus Torvalds 25811da177e4SLinus Torvalds make_route: 25823abd1adeSDavid Ahern rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); 25831da177e4SLinus Torvalds 2584010c2708SDavid S. Miller out: 2585b23dd4feSDavid S. Miller return rth; 25861da177e4SLinus Torvalds } 2587d8c97a94SArnaldo Carvalho de Melo 2588ae2688d5SJianzhao Wang static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) 2589ae2688d5SJianzhao Wang { 2590ae2688d5SJianzhao Wang return NULL; 2591ae2688d5SJianzhao Wang } 2592ae2688d5SJianzhao Wang 2593ebb762f2SSteffen Klassert static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) 2594ec831ea7SRoland Dreier { 2595618f9bc7SSteffen Klassert unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 2596618f9bc7SSteffen Klassert 2597618f9bc7SSteffen Klassert return mtu ? : dst->dev->mtu; 2598ec831ea7SRoland Dreier } 2599ec831ea7SRoland Dreier 26006700c270SDavid S. Miller static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 26016700c270SDavid S. Miller struct sk_buff *skb, u32 mtu) 260214e50e57SDavid S. Miller { 260314e50e57SDavid S. Miller } 260414e50e57SDavid S. Miller 26056700c270SDavid S. Miller static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 26066700c270SDavid S. Miller struct sk_buff *skb) 2607b587ee3bSDavid S. Miller { 2608b587ee3bSDavid S. Miller } 2609b587ee3bSDavid S. Miller 26100972ddb2SHeld Bernhard static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, 26110972ddb2SHeld Bernhard unsigned long old) 26120972ddb2SHeld Bernhard { 26130972ddb2SHeld Bernhard return NULL; 26140972ddb2SHeld Bernhard } 26150972ddb2SHeld Bernhard 261614e50e57SDavid S. Miller static struct dst_ops ipv4_dst_blackhole_ops = { 261714e50e57SDavid S. Miller .family = AF_INET, 2618ae2688d5SJianzhao Wang .check = ipv4_blackhole_dst_check, 2619ebb762f2SSteffen Klassert .mtu = ipv4_blackhole_mtu, 2620214f45c9SEric Dumazet .default_advmss = ipv4_default_advmss, 262114e50e57SDavid S. Miller .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2622b587ee3bSDavid S. Miller .redirect = ipv4_rt_blackhole_redirect, 26230972ddb2SHeld Bernhard .cow_metrics = ipv4_rt_blackhole_cow_metrics, 2624d3aaeb38SDavid S. Miller .neigh_lookup = ipv4_neigh_lookup, 262514e50e57SDavid S. Miller }; 262614e50e57SDavid S. Miller 26272774c131SDavid S. Miller struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) 262814e50e57SDavid S. Miller { 26292774c131SDavid S. Miller struct rtable *ort = (struct rtable *) dst_orig; 2630f5b0a874SDavid S. Miller struct rtable *rt; 263114e50e57SDavid S. Miller 26326c0e7284SSteffen Klassert rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); 263314e50e57SDavid S. Miller if (rt) { 2634d8d1f30bSChangli Gao struct dst_entry *new = &rt->dst; 263514e50e57SDavid S. Miller 263614e50e57SDavid S. Miller new->__use = 1; 2637352e512cSHerbert Xu new->input = dst_discard; 2638ede2059dSEric W. Biederman new->output = dst_discard_out; 263914e50e57SDavid S. Miller 26401dbe3252SWei Wang new->dev = net->loopback_dev; 264114e50e57SDavid S. Miller if (new->dev) 264214e50e57SDavid S. Miller dev_hold(new->dev); 264314e50e57SDavid S. Miller 26449917e1e8SDavid S. Miller rt->rt_is_input = ort->rt_is_input; 26455e2b61f7SDavid S. Miller rt->rt_iif = ort->rt_iif; 26465943634fSDavid S. Miller rt->rt_pmtu = ort->rt_pmtu; 2647d52e5a7eSSabrina Dubroca rt->rt_mtu_locked = ort->rt_mtu_locked; 264814e50e57SDavid S. Miller 2649ca4c3fc2Sfan.du rt->rt_genid = rt_genid_ipv4(net); 265014e50e57SDavid S. Miller rt->rt_flags = ort->rt_flags; 265114e50e57SDavid S. Miller rt->rt_type = ort->rt_type; 26521550c171SDavid Ahern rt->rt_gw_family = ort->rt_gw_family; 26531550c171SDavid Ahern if (rt->rt_gw_family == AF_INET) 26541550c171SDavid Ahern rt->rt_gw4 = ort->rt_gw4; 26550f5f7d7bSDavid Ahern else if (rt->rt_gw_family == AF_INET6) 26560f5f7d7bSDavid Ahern rt->rt_gw6 = ort->rt_gw6; 265714e50e57SDavid S. Miller 2658caacf05eSDavid S. Miller INIT_LIST_HEAD(&rt->rt_uncached); 265914e50e57SDavid S. Miller } 266014e50e57SDavid S. Miller 26612774c131SDavid S. Miller dst_release(dst_orig); 26622774c131SDavid S. Miller 26632774c131SDavid S. Miller return rt ? &rt->dst : ERR_PTR(-ENOMEM); 266414e50e57SDavid S. Miller } 266514e50e57SDavid S. Miller 26669d6ec938SDavid S. Miller struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, 26676f9c9615SEric Dumazet const struct sock *sk) 26681da177e4SLinus Torvalds { 26699d6ec938SDavid S. Miller struct rtable *rt = __ip_route_output_key(net, flp4); 26701da177e4SLinus Torvalds 2671b23dd4feSDavid S. Miller if (IS_ERR(rt)) 2672b23dd4feSDavid S. Miller return rt; 26731da177e4SLinus Torvalds 267456157872SDavid S. Miller if (flp4->flowi4_proto) 2675f92ee619SSteffen Klassert rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, 26769d6ec938SDavid S. Miller flowi4_to_flowi(flp4), 26779d6ec938SDavid S. Miller sk, 0); 26781da177e4SLinus Torvalds 2679b23dd4feSDavid S. Miller return rt; 26801da177e4SLinus Torvalds } 2681d8c97a94SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(ip_route_output_flow); 2682d8c97a94SArnaldo Carvalho de Melo 26833765d35eSDavid Ahern /* called with rcu_read_lock held */ 2684404eb77eSRoopa Prabhu static int rt_fill_info(struct net *net, __be32 dst, __be32 src, 2685404eb77eSRoopa Prabhu struct rtable *rt, u32 table_id, struct flowi4 *fl4, 2686404eb77eSRoopa Prabhu struct sk_buff *skb, u32 portid, u32 seq) 26871da177e4SLinus Torvalds { 26881da177e4SLinus Torvalds struct rtmsg *r; 26891da177e4SLinus Torvalds struct nlmsghdr *nlh; 26902bc8ca40SSteffen Klassert unsigned long expires = 0; 2691f185071dSDavid S. Miller u32 error; 2692521f5490SJulian Anastasov u32 metrics[RTAX_MAX]; 2693be403ea1SThomas Graf 2694d3166e0cSDavid Ahern nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0); 269551456b29SIan Morris if (!nlh) 269626932566SPatrick McHardy return -EMSGSIZE; 2697be403ea1SThomas Graf 2698be403ea1SThomas Graf r = nlmsg_data(nlh); 26991da177e4SLinus Torvalds r->rtm_family = AF_INET; 27001da177e4SLinus Torvalds r->rtm_dst_len = 32; 27011da177e4SLinus Torvalds r->rtm_src_len = 0; 2702*d948974cSStefano Brivio r->rtm_tos = fl4 ? fl4->flowi4_tos : 0; 27038a430ed5SDavid Ahern r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; 2704c36ba660SDavid Ahern if (nla_put_u32(skb, RTA_TABLE, table_id)) 2705f3756b79SDavid S. Miller goto nla_put_failure; 27061da177e4SLinus Torvalds r->rtm_type = rt->rt_type; 27071da177e4SLinus Torvalds r->rtm_scope = RT_SCOPE_UNIVERSE; 27081da177e4SLinus Torvalds r->rtm_protocol = RTPROT_UNSPEC; 27091da177e4SLinus Torvalds r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 27101da177e4SLinus Torvalds if (rt->rt_flags & RTCF_NOTIFY) 27111da177e4SLinus Torvalds r->rtm_flags |= RTM_F_NOTIFY; 2712df4d9254SHannes Frederic Sowa if (IPCB(skb)->flags & IPSKB_DOREDIRECT) 2713df4d9254SHannes Frederic Sowa r->rtm_flags |= RTCF_DOREDIRECT; 2714be403ea1SThomas Graf 2715930345eaSJiri Benc if (nla_put_in_addr(skb, RTA_DST, dst)) 2716f3756b79SDavid S. Miller goto nla_put_failure; 27171a00fee4SDavid Miller if (src) { 27181da177e4SLinus Torvalds r->rtm_src_len = 32; 2719930345eaSJiri Benc if (nla_put_in_addr(skb, RTA_SRC, src)) 2720f3756b79SDavid S. Miller goto nla_put_failure; 27211da177e4SLinus Torvalds } 2722f3756b79SDavid S. Miller if (rt->dst.dev && 2723f3756b79SDavid S. Miller nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2724f3756b79SDavid S. Miller goto nla_put_failure; 2725c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 2726f3756b79SDavid S. Miller if (rt->dst.tclassid && 2727f3756b79SDavid S. Miller nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) 2728f3756b79SDavid S. Miller goto nla_put_failure; 27291da177e4SLinus Torvalds #endif 2730*d948974cSStefano Brivio if (fl4 && !rt_is_input_route(rt) && 2731d6c0a4f6SDavid Miller fl4->saddr != src) { 2732930345eaSJiri Benc if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) 2733f3756b79SDavid S. Miller goto nla_put_failure; 2734f3756b79SDavid S. Miller } 27351550c171SDavid Ahern if (rt->rt_gw_family == AF_INET && 27360f5f7d7bSDavid Ahern nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) { 2737f3756b79SDavid S. Miller goto nla_put_failure; 27380f5f7d7bSDavid Ahern } else if (rt->rt_gw_family == AF_INET6) { 27390f5f7d7bSDavid Ahern int alen = sizeof(struct in6_addr); 27400f5f7d7bSDavid Ahern struct nlattr *nla; 27410f5f7d7bSDavid Ahern struct rtvia *via; 27420f5f7d7bSDavid Ahern 27430f5f7d7bSDavid Ahern nla = nla_reserve(skb, RTA_VIA, alen + 2); 27440f5f7d7bSDavid Ahern if (!nla) 27450f5f7d7bSDavid Ahern goto nla_put_failure; 27460f5f7d7bSDavid Ahern 27470f5f7d7bSDavid Ahern via = nla_data(nla); 27480f5f7d7bSDavid Ahern via->rtvia_family = AF_INET6; 27490f5f7d7bSDavid Ahern memcpy(via->rtvia_addr, &rt->rt_gw6, alen); 27500f5f7d7bSDavid Ahern } 2751be403ea1SThomas Graf 2752ee9a8f7aSSteffen Klassert expires = rt->dst.expires; 2753ee9a8f7aSSteffen Klassert if (expires) { 2754ee9a8f7aSSteffen Klassert unsigned long now = jiffies; 2755ee9a8f7aSSteffen Klassert 2756ee9a8f7aSSteffen Klassert if (time_before(now, expires)) 2757ee9a8f7aSSteffen Klassert expires -= now; 2758ee9a8f7aSSteffen Klassert else 2759ee9a8f7aSSteffen Klassert expires = 0; 2760ee9a8f7aSSteffen Klassert } 2761ee9a8f7aSSteffen Klassert 2762521f5490SJulian Anastasov memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 2763ee9a8f7aSSteffen Klassert if (rt->rt_pmtu && expires) 2764521f5490SJulian Anastasov metrics[RTAX_MTU - 1] = rt->rt_pmtu; 2765d52e5a7eSSabrina Dubroca if (rt->rt_mtu_locked && expires) 2766d52e5a7eSSabrina Dubroca metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); 2767521f5490SJulian Anastasov if (rtnetlink_put_metrics(skb, metrics) < 0) 2768be403ea1SThomas Graf goto nla_put_failure; 2769be403ea1SThomas Graf 2770*d948974cSStefano Brivio if (fl4) { 2771b4869889SDavid Miller if (fl4->flowi4_mark && 277268aaed54Sstephen hemminger nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) 2773f3756b79SDavid S. Miller goto nla_put_failure; 2774963bfeeeSEric Dumazet 2775622ec2c9SLorenzo Colitti if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && 2776622ec2c9SLorenzo Colitti nla_put_u32(skb, RTA_UID, 2777*d948974cSStefano Brivio from_kuid_munged(current_user_ns(), 2778*d948974cSStefano Brivio fl4->flowi4_uid))) 2779622ec2c9SLorenzo Colitti goto nla_put_failure; 2780622ec2c9SLorenzo Colitti 2781c7537967SDavid S. Miller if (rt_is_input_route(rt)) { 27828caaf7b6SNicolas Dichtel #ifdef CONFIG_IP_MROUTE 2783*d948974cSStefano Brivio if (ipv4_is_multicast(dst) && 2784*d948974cSStefano Brivio !ipv4_is_local_multicast(dst) && 27858caaf7b6SNicolas Dichtel IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { 27868caaf7b6SNicolas Dichtel int err = ipmr_get_route(net, skb, 27878caaf7b6SNicolas Dichtel fl4->saddr, fl4->daddr, 27889f09eaeaSDavid Ahern r, portid); 27892cf75070SNikolay Aleksandrov 27908caaf7b6SNicolas Dichtel if (err <= 0) { 27918caaf7b6SNicolas Dichtel if (err == 0) 27928caaf7b6SNicolas Dichtel return 0; 27938caaf7b6SNicolas Dichtel goto nla_put_failure; 27948caaf7b6SNicolas Dichtel } 27958caaf7b6SNicolas Dichtel } else 27968caaf7b6SNicolas Dichtel #endif 2797404eb77eSRoopa Prabhu if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) 2798f3756b79SDavid S. Miller goto nla_put_failure; 27991da177e4SLinus Torvalds } 2800*d948974cSStefano Brivio } 2801*d948974cSStefano Brivio 2802*d948974cSStefano Brivio error = rt->dst.error; 28031da177e4SLinus Torvalds 2804f185071dSDavid S. Miller if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) 2805e3703b3dSThomas Graf goto nla_put_failure; 28061da177e4SLinus Torvalds 2807053c095aSJohannes Berg nlmsg_end(skb, nlh); 2808053c095aSJohannes Berg return 0; 2809be403ea1SThomas Graf 2810be403ea1SThomas Graf nla_put_failure: 281126932566SPatrick McHardy nlmsg_cancel(skb, nlh); 281226932566SPatrick McHardy return -EMSGSIZE; 28131da177e4SLinus Torvalds } 28141da177e4SLinus Torvalds 2815404eb77eSRoopa Prabhu static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, 2816404eb77eSRoopa Prabhu u8 ip_proto, __be16 sport, 2817404eb77eSRoopa Prabhu __be16 dport) 2818404eb77eSRoopa Prabhu { 2819404eb77eSRoopa Prabhu struct sk_buff *skb; 2820404eb77eSRoopa Prabhu struct iphdr *iph; 2821404eb77eSRoopa Prabhu 2822404eb77eSRoopa Prabhu skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2823404eb77eSRoopa Prabhu if (!skb) 2824404eb77eSRoopa Prabhu return NULL; 2825404eb77eSRoopa Prabhu 2826404eb77eSRoopa Prabhu /* Reserve room for dummy headers, this skb can pass 2827404eb77eSRoopa Prabhu * through good chunk of routing engine. 2828404eb77eSRoopa Prabhu */ 2829404eb77eSRoopa Prabhu skb_reset_mac_header(skb); 2830404eb77eSRoopa Prabhu skb_reset_network_header(skb); 2831404eb77eSRoopa Prabhu skb->protocol = htons(ETH_P_IP); 2832404eb77eSRoopa Prabhu iph = skb_put(skb, sizeof(struct iphdr)); 2833404eb77eSRoopa Prabhu iph->protocol = ip_proto; 2834404eb77eSRoopa Prabhu iph->saddr = src; 2835404eb77eSRoopa Prabhu iph->daddr = dst; 2836404eb77eSRoopa Prabhu iph->version = 0x4; 2837404eb77eSRoopa Prabhu iph->frag_off = 0; 2838404eb77eSRoopa Prabhu iph->ihl = 0x5; 2839404eb77eSRoopa Prabhu skb_set_transport_header(skb, skb->len); 2840404eb77eSRoopa Prabhu 2841404eb77eSRoopa Prabhu switch (iph->protocol) { 2842404eb77eSRoopa Prabhu case IPPROTO_UDP: { 2843404eb77eSRoopa Prabhu struct udphdr *udph; 2844404eb77eSRoopa Prabhu 2845404eb77eSRoopa Prabhu udph = skb_put_zero(skb, sizeof(struct udphdr)); 2846404eb77eSRoopa Prabhu udph->source = sport; 2847404eb77eSRoopa Prabhu udph->dest = dport; 2848404eb77eSRoopa Prabhu udph->len = sizeof(struct udphdr); 2849404eb77eSRoopa Prabhu udph->check = 0; 2850404eb77eSRoopa Prabhu break; 2851404eb77eSRoopa Prabhu } 2852404eb77eSRoopa Prabhu case IPPROTO_TCP: { 2853404eb77eSRoopa Prabhu struct tcphdr *tcph; 2854404eb77eSRoopa Prabhu 2855404eb77eSRoopa Prabhu tcph = skb_put_zero(skb, sizeof(struct tcphdr)); 2856404eb77eSRoopa Prabhu tcph->source = sport; 2857404eb77eSRoopa Prabhu tcph->dest = dport; 2858404eb77eSRoopa Prabhu tcph->doff = sizeof(struct tcphdr) / 4; 2859404eb77eSRoopa Prabhu tcph->rst = 1; 2860404eb77eSRoopa Prabhu tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), 2861404eb77eSRoopa Prabhu src, dst, 0); 2862404eb77eSRoopa Prabhu break; 2863404eb77eSRoopa Prabhu } 2864404eb77eSRoopa Prabhu case IPPROTO_ICMP: { 2865404eb77eSRoopa Prabhu struct icmphdr *icmph; 2866404eb77eSRoopa Prabhu 2867404eb77eSRoopa Prabhu icmph = skb_put_zero(skb, sizeof(struct icmphdr)); 2868404eb77eSRoopa Prabhu icmph->type = ICMP_ECHO; 2869404eb77eSRoopa Prabhu icmph->code = 0; 2870404eb77eSRoopa Prabhu } 2871404eb77eSRoopa Prabhu } 2872404eb77eSRoopa Prabhu 2873404eb77eSRoopa Prabhu return skb; 2874404eb77eSRoopa Prabhu } 2875404eb77eSRoopa Prabhu 2876a00302b6SJakub Kicinski static int inet_rtm_valid_getroute_req(struct sk_buff *skb, 2877a00302b6SJakub Kicinski const struct nlmsghdr *nlh, 2878a00302b6SJakub Kicinski struct nlattr **tb, 2879a00302b6SJakub Kicinski struct netlink_ext_ack *extack) 2880a00302b6SJakub Kicinski { 2881a00302b6SJakub Kicinski struct rtmsg *rtm; 2882a00302b6SJakub Kicinski int i, err; 2883a00302b6SJakub Kicinski 2884a00302b6SJakub Kicinski if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { 2885a00302b6SJakub Kicinski NL_SET_ERR_MSG(extack, 2886a00302b6SJakub Kicinski "ipv4: Invalid header for route get request"); 2887a00302b6SJakub Kicinski return -EINVAL; 2888a00302b6SJakub Kicinski } 2889a00302b6SJakub Kicinski 2890a00302b6SJakub Kicinski if (!netlink_strict_get_check(skb)) 28918cb08174SJohannes Berg return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 2892a00302b6SJakub Kicinski rtm_ipv4_policy, extack); 2893a00302b6SJakub Kicinski 2894a00302b6SJakub Kicinski rtm = nlmsg_data(nlh); 2895a00302b6SJakub Kicinski if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || 2896a00302b6SJakub Kicinski (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || 2897a00302b6SJakub Kicinski rtm->rtm_table || rtm->rtm_protocol || 2898a00302b6SJakub Kicinski rtm->rtm_scope || rtm->rtm_type) { 2899a00302b6SJakub Kicinski NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request"); 2900a00302b6SJakub Kicinski return -EINVAL; 2901a00302b6SJakub Kicinski } 2902a00302b6SJakub Kicinski 2903a00302b6SJakub Kicinski if (rtm->rtm_flags & ~(RTM_F_NOTIFY | 2904a00302b6SJakub Kicinski RTM_F_LOOKUP_TABLE | 2905a00302b6SJakub Kicinski RTM_F_FIB_MATCH)) { 2906a00302b6SJakub Kicinski NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request"); 2907a00302b6SJakub Kicinski return -EINVAL; 2908a00302b6SJakub Kicinski } 2909a00302b6SJakub Kicinski 29108cb08174SJohannes Berg err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2911a00302b6SJakub Kicinski rtm_ipv4_policy, extack); 2912a00302b6SJakub Kicinski if (err) 2913a00302b6SJakub Kicinski return err; 2914a00302b6SJakub Kicinski 2915a00302b6SJakub Kicinski if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 2916a00302b6SJakub Kicinski (tb[RTA_DST] && !rtm->rtm_dst_len)) { 2917a00302b6SJakub Kicinski NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); 2918a00302b6SJakub Kicinski return -EINVAL; 2919a00302b6SJakub Kicinski } 2920a00302b6SJakub Kicinski 2921a00302b6SJakub Kicinski for (i = 0; i <= RTA_MAX; i++) { 2922a00302b6SJakub Kicinski if (!tb[i]) 2923a00302b6SJakub Kicinski continue; 2924a00302b6SJakub Kicinski 2925a00302b6SJakub Kicinski switch (i) { 2926a00302b6SJakub Kicinski case RTA_IIF: 2927a00302b6SJakub Kicinski case RTA_OIF: 2928a00302b6SJakub Kicinski case RTA_SRC: 2929a00302b6SJakub Kicinski case RTA_DST: 2930a00302b6SJakub Kicinski case RTA_IP_PROTO: 2931a00302b6SJakub Kicinski case RTA_SPORT: 2932a00302b6SJakub Kicinski case RTA_DPORT: 2933a00302b6SJakub Kicinski case RTA_MARK: 2934a00302b6SJakub Kicinski case RTA_UID: 2935a00302b6SJakub Kicinski break; 2936a00302b6SJakub Kicinski default: 2937a00302b6SJakub Kicinski NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request"); 2938a00302b6SJakub Kicinski return -EINVAL; 2939a00302b6SJakub Kicinski } 2940a00302b6SJakub Kicinski } 2941a00302b6SJakub Kicinski 2942a00302b6SJakub Kicinski return 0; 2943a00302b6SJakub Kicinski } 2944a00302b6SJakub Kicinski 2945c21ef3e3SDavid Ahern static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2946c21ef3e3SDavid Ahern struct netlink_ext_ack *extack) 29471da177e4SLinus Torvalds { 29483b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(in_skb->sk); 2949d889ce3bSThomas Graf struct nlattr *tb[RTA_MAX+1]; 2950404eb77eSRoopa Prabhu u32 table_id = RT_TABLE_MAIN; 2951404eb77eSRoopa Prabhu __be16 sport = 0, dport = 0; 29523765d35eSDavid Ahern struct fib_result res = {}; 2953404eb77eSRoopa Prabhu u8 ip_proto = IPPROTO_UDP; 29541da177e4SLinus Torvalds struct rtable *rt = NULL; 2955404eb77eSRoopa Prabhu struct sk_buff *skb; 2956404eb77eSRoopa Prabhu struct rtmsg *rtm; 2957e8e3fbe9SMaciej Żenczykowski struct flowi4 fl4 = {}; 29589e12bb22SAl Viro __be32 dst = 0; 29599e12bb22SAl Viro __be32 src = 0; 2960404eb77eSRoopa Prabhu kuid_t uid; 29619e12bb22SAl Viro u32 iif; 2962d889ce3bSThomas Graf int err; 2963963bfeeeSEric Dumazet int mark; 29641da177e4SLinus Torvalds 2965a00302b6SJakub Kicinski err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 2966d889ce3bSThomas Graf if (err < 0) 2967404eb77eSRoopa Prabhu return err; 2968d889ce3bSThomas Graf 2969d889ce3bSThomas Graf rtm = nlmsg_data(nlh); 297067b61f6cSJiri Benc src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; 297167b61f6cSJiri Benc dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; 2972d889ce3bSThomas Graf iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; 2973963bfeeeSEric Dumazet mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; 2974622ec2c9SLorenzo Colitti if (tb[RTA_UID]) 2975622ec2c9SLorenzo Colitti uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); 2976622ec2c9SLorenzo Colitti else 2977622ec2c9SLorenzo Colitti uid = (iif ? INVALID_UID : current_uid()); 29781da177e4SLinus Torvalds 2979404eb77eSRoopa Prabhu if (tb[RTA_IP_PROTO]) { 2980404eb77eSRoopa Prabhu err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], 29815e1a99eaSHangbin Liu &ip_proto, AF_INET, extack); 2982404eb77eSRoopa Prabhu if (err) 2983404eb77eSRoopa Prabhu return err; 2984404eb77eSRoopa Prabhu } 2985bbadb9a2SFlorian Larysch 2986404eb77eSRoopa Prabhu if (tb[RTA_SPORT]) 2987404eb77eSRoopa Prabhu sport = nla_get_be16(tb[RTA_SPORT]); 2988404eb77eSRoopa Prabhu 2989404eb77eSRoopa Prabhu if (tb[RTA_DPORT]) 2990404eb77eSRoopa Prabhu dport = nla_get_be16(tb[RTA_DPORT]); 2991404eb77eSRoopa Prabhu 2992404eb77eSRoopa Prabhu skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport); 2993404eb77eSRoopa Prabhu if (!skb) 2994404eb77eSRoopa Prabhu return -ENOBUFS; 2995bbadb9a2SFlorian Larysch 2996d6c0a4f6SDavid Miller fl4.daddr = dst; 2997d6c0a4f6SDavid Miller fl4.saddr = src; 2998d6c0a4f6SDavid Miller fl4.flowi4_tos = rtm->rtm_tos; 2999d6c0a4f6SDavid Miller fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; 3000d6c0a4f6SDavid Miller fl4.flowi4_mark = mark; 3001622ec2c9SLorenzo Colitti fl4.flowi4_uid = uid; 3002404eb77eSRoopa Prabhu if (sport) 3003404eb77eSRoopa Prabhu fl4.fl4_sport = sport; 3004404eb77eSRoopa Prabhu if (dport) 3005404eb77eSRoopa Prabhu fl4.fl4_dport = dport; 3006404eb77eSRoopa Prabhu fl4.flowi4_proto = ip_proto; 3007d6c0a4f6SDavid Miller 30083765d35eSDavid Ahern rcu_read_lock(); 30093765d35eSDavid Ahern 30101da177e4SLinus Torvalds if (iif) { 3011d889ce3bSThomas Graf struct net_device *dev; 3012d889ce3bSThomas Graf 30133765d35eSDavid Ahern dev = dev_get_by_index_rcu(net, iif); 301451456b29SIan Morris if (!dev) { 30151da177e4SLinus Torvalds err = -ENODEV; 3016404eb77eSRoopa Prabhu goto errout_rcu; 3017d889ce3bSThomas Graf } 3018d889ce3bSThomas Graf 3019404eb77eSRoopa Prabhu fl4.flowi4_iif = iif; /* for rt_fill_info */ 30201da177e4SLinus Torvalds skb->dev = dev; 3021963bfeeeSEric Dumazet skb->mark = mark; 30223765d35eSDavid Ahern err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, 30233765d35eSDavid Ahern dev, &res); 3024d889ce3bSThomas Graf 3025511c3f92SEric Dumazet rt = skb_rtable(skb); 3026d8d1f30bSChangli Gao if (err == 0 && rt->dst.error) 3027d8d1f30bSChangli Gao err = -rt->dst.error; 30281da177e4SLinus Torvalds } else { 30296503a304SLorenzo Colitti fl4.flowi4_iif = LOOPBACK_IFINDEX; 303021f94775SIdo Schimmel skb->dev = net->loopback_dev; 30313765d35eSDavid Ahern rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); 3032b23dd4feSDavid S. Miller err = 0; 3033b23dd4feSDavid S. Miller if (IS_ERR(rt)) 3034b23dd4feSDavid S. Miller err = PTR_ERR(rt); 30352c87d63aSFlorian Westphal else 30362c87d63aSFlorian Westphal skb_dst_set(skb, &rt->dst); 30371da177e4SLinus Torvalds } 3038d889ce3bSThomas Graf 30391da177e4SLinus Torvalds if (err) 3040404eb77eSRoopa Prabhu goto errout_rcu; 30411da177e4SLinus Torvalds 30421da177e4SLinus Torvalds if (rtm->rtm_flags & RTM_F_NOTIFY) 30431da177e4SLinus Torvalds rt->rt_flags |= RTCF_NOTIFY; 30441da177e4SLinus Torvalds 3045c36ba660SDavid Ahern if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) 304668e813aaSDavid Ahern table_id = res.table ? res.table->tb_id : 0; 3047c36ba660SDavid Ahern 3048404eb77eSRoopa Prabhu /* reset skb for netlink reply msg */ 3049404eb77eSRoopa Prabhu skb_trim(skb, 0); 3050404eb77eSRoopa Prabhu skb_reset_network_header(skb); 3051404eb77eSRoopa Prabhu skb_reset_transport_header(skb); 3052404eb77eSRoopa Prabhu skb_reset_mac_header(skb); 3053404eb77eSRoopa Prabhu 3054bc3aae2bSRoopa Prabhu if (rtm->rtm_flags & RTM_F_FIB_MATCH) { 3055bc3aae2bSRoopa Prabhu if (!res.fi) { 3056bc3aae2bSRoopa Prabhu err = fib_props[res.type].error; 3057bc3aae2bSRoopa Prabhu if (!err) 3058bc3aae2bSRoopa Prabhu err = -EHOSTUNREACH; 3059404eb77eSRoopa Prabhu goto errout_rcu; 3060bc3aae2bSRoopa Prabhu } 3061b6179813SRoopa Prabhu err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, 3062b6179813SRoopa Prabhu nlh->nlmsg_seq, RTM_NEWROUTE, table_id, 3063b6179813SRoopa Prabhu rt->rt_type, res.prefix, res.prefixlen, 3064b6179813SRoopa Prabhu fl4.flowi4_tos, res.fi, 0); 3065bc3aae2bSRoopa Prabhu } else { 3066404eb77eSRoopa Prabhu err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb, 3067ba52d61eSRoopa Prabhu NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); 3068bc3aae2bSRoopa Prabhu } 30697b46a644SDavid S. Miller if (err < 0) 3070404eb77eSRoopa Prabhu goto errout_rcu; 30711da177e4SLinus Torvalds 30723765d35eSDavid Ahern rcu_read_unlock(); 30733765d35eSDavid Ahern 307415e47304SEric W. Biederman err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 30751da177e4SLinus Torvalds 3076d889ce3bSThomas Graf errout_free: 3077404eb77eSRoopa Prabhu return err; 3078404eb77eSRoopa Prabhu errout_rcu: 30793765d35eSDavid Ahern rcu_read_unlock(); 30801da177e4SLinus Torvalds kfree_skb(skb); 3081404eb77eSRoopa Prabhu goto errout_free; 30821da177e4SLinus Torvalds } 30831da177e4SLinus Torvalds 30841da177e4SLinus Torvalds void ip_rt_multicast_event(struct in_device *in_dev) 30851da177e4SLinus Torvalds { 30864ccfe6d4SNicolas Dichtel rt_cache_flush(dev_net(in_dev->dev)); 30871da177e4SLinus Torvalds } 30881da177e4SLinus Torvalds 30891da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL 3090082c7ca4SGao feng static int ip_rt_gc_interval __read_mostly = 60 * HZ; 3091082c7ca4SGao feng static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 3092082c7ca4SGao feng static int ip_rt_gc_elasticity __read_mostly = 8; 3093773daa3cSArnd Bergmann static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; 3094082c7ca4SGao feng 3095fe2c6338SJoe Perches static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, 30968d65af78SAlexey Dobriyan void __user *buffer, 30971da177e4SLinus Torvalds size_t *lenp, loff_t *ppos) 30981da177e4SLinus Torvalds { 30995aad1de5STimo Teräs struct net *net = (struct net *)__ctl->extra1; 31005aad1de5STimo Teräs 31011da177e4SLinus Torvalds if (write) { 31025aad1de5STimo Teräs rt_cache_flush(net); 31035aad1de5STimo Teräs fnhe_genid_bump(net); 31041da177e4SLinus Torvalds return 0; 31051da177e4SLinus Torvalds } 31061da177e4SLinus Torvalds 31071da177e4SLinus Torvalds return -EINVAL; 31081da177e4SLinus Torvalds } 31091da177e4SLinus Torvalds 3110fe2c6338SJoe Perches static struct ctl_table ipv4_route_table[] = { 31111da177e4SLinus Torvalds { 31121da177e4SLinus Torvalds .procname = "gc_thresh", 31131da177e4SLinus Torvalds .data = &ipv4_dst_ops.gc_thresh, 31141da177e4SLinus Torvalds .maxlen = sizeof(int), 31151da177e4SLinus Torvalds .mode = 0644, 31166d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 31171da177e4SLinus Torvalds }, 31181da177e4SLinus Torvalds { 31191da177e4SLinus Torvalds .procname = "max_size", 31201da177e4SLinus Torvalds .data = &ip_rt_max_size, 31211da177e4SLinus Torvalds .maxlen = sizeof(int), 31221da177e4SLinus Torvalds .mode = 0644, 31236d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 31241da177e4SLinus Torvalds }, 31251da177e4SLinus Torvalds { 31261da177e4SLinus Torvalds /* Deprecated. Use gc_min_interval_ms */ 31271da177e4SLinus Torvalds 31281da177e4SLinus Torvalds .procname = "gc_min_interval", 31291da177e4SLinus Torvalds .data = &ip_rt_gc_min_interval, 31301da177e4SLinus Torvalds .maxlen = sizeof(int), 31311da177e4SLinus Torvalds .mode = 0644, 31326d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 31331da177e4SLinus Torvalds }, 31341da177e4SLinus Torvalds { 31351da177e4SLinus Torvalds .procname = "gc_min_interval_ms", 31361da177e4SLinus Torvalds .data = &ip_rt_gc_min_interval, 31371da177e4SLinus Torvalds .maxlen = sizeof(int), 31381da177e4SLinus Torvalds .mode = 0644, 31396d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_ms_jiffies, 31401da177e4SLinus Torvalds }, 31411da177e4SLinus Torvalds { 31421da177e4SLinus Torvalds .procname = "gc_timeout", 31431da177e4SLinus Torvalds .data = &ip_rt_gc_timeout, 31441da177e4SLinus Torvalds .maxlen = sizeof(int), 31451da177e4SLinus Torvalds .mode = 0644, 31466d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 31471da177e4SLinus Torvalds }, 31481da177e4SLinus Torvalds { 31499f28a2fcSEric Dumazet .procname = "gc_interval", 31509f28a2fcSEric Dumazet .data = &ip_rt_gc_interval, 31519f28a2fcSEric Dumazet .maxlen = sizeof(int), 31529f28a2fcSEric Dumazet .mode = 0644, 31539f28a2fcSEric Dumazet .proc_handler = proc_dointvec_jiffies, 31549f28a2fcSEric Dumazet }, 31559f28a2fcSEric Dumazet { 31561da177e4SLinus Torvalds .procname = "redirect_load", 31571da177e4SLinus Torvalds .data = &ip_rt_redirect_load, 31581da177e4SLinus Torvalds .maxlen = sizeof(int), 31591da177e4SLinus Torvalds .mode = 0644, 31606d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 31611da177e4SLinus Torvalds }, 31621da177e4SLinus Torvalds { 31631da177e4SLinus Torvalds .procname = "redirect_number", 31641da177e4SLinus Torvalds .data = &ip_rt_redirect_number, 31651da177e4SLinus Torvalds .maxlen = sizeof(int), 31661da177e4SLinus Torvalds .mode = 0644, 31676d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 31681da177e4SLinus Torvalds }, 31691da177e4SLinus Torvalds { 31701da177e4SLinus Torvalds .procname = "redirect_silence", 31711da177e4SLinus Torvalds .data = &ip_rt_redirect_silence, 31721da177e4SLinus Torvalds .maxlen = sizeof(int), 31731da177e4SLinus Torvalds .mode = 0644, 31746d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 31751da177e4SLinus Torvalds }, 31761da177e4SLinus Torvalds { 31771da177e4SLinus Torvalds .procname = "error_cost", 31781da177e4SLinus Torvalds .data = &ip_rt_error_cost, 31791da177e4SLinus Torvalds .maxlen = sizeof(int), 31801da177e4SLinus Torvalds .mode = 0644, 31816d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 31821da177e4SLinus Torvalds }, 31831da177e4SLinus Torvalds { 31841da177e4SLinus Torvalds .procname = "error_burst", 31851da177e4SLinus Torvalds .data = &ip_rt_error_burst, 31861da177e4SLinus Torvalds .maxlen = sizeof(int), 31871da177e4SLinus Torvalds .mode = 0644, 31886d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 31891da177e4SLinus Torvalds }, 31901da177e4SLinus Torvalds { 31911da177e4SLinus Torvalds .procname = "gc_elasticity", 31921da177e4SLinus Torvalds .data = &ip_rt_gc_elasticity, 31931da177e4SLinus Torvalds .maxlen = sizeof(int), 31941da177e4SLinus Torvalds .mode = 0644, 31956d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 31961da177e4SLinus Torvalds }, 31971da177e4SLinus Torvalds { 31981da177e4SLinus Torvalds .procname = "mtu_expires", 31991da177e4SLinus Torvalds .data = &ip_rt_mtu_expires, 32001da177e4SLinus Torvalds .maxlen = sizeof(int), 32011da177e4SLinus Torvalds .mode = 0644, 32026d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 32031da177e4SLinus Torvalds }, 32041da177e4SLinus Torvalds { 32051da177e4SLinus Torvalds .procname = "min_pmtu", 32061da177e4SLinus Torvalds .data = &ip_rt_min_pmtu, 32071da177e4SLinus Torvalds .maxlen = sizeof(int), 32081da177e4SLinus Torvalds .mode = 0644, 3209c7272c2fSSabrina Dubroca .proc_handler = proc_dointvec_minmax, 3210c7272c2fSSabrina Dubroca .extra1 = &ip_min_valid_pmtu, 32111da177e4SLinus Torvalds }, 32121da177e4SLinus Torvalds { 32131da177e4SLinus Torvalds .procname = "min_adv_mss", 32141da177e4SLinus Torvalds .data = &ip_rt_min_advmss, 32151da177e4SLinus Torvalds .maxlen = sizeof(int), 32161da177e4SLinus Torvalds .mode = 0644, 32176d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 32181da177e4SLinus Torvalds }, 3219f8572d8fSEric W. Biederman { } 32201da177e4SLinus Torvalds }; 322139a23e75SDenis V. Lunev 322239a23e75SDenis V. Lunev static struct ctl_table ipv4_route_flush_table[] = { 322339a23e75SDenis V. Lunev { 322439a23e75SDenis V. Lunev .procname = "flush", 322539a23e75SDenis V. Lunev .maxlen = sizeof(int), 322639a23e75SDenis V. Lunev .mode = 0200, 32276d9f239aSAlexey Dobriyan .proc_handler = ipv4_sysctl_rtcache_flush, 322839a23e75SDenis V. Lunev }, 3229f8572d8fSEric W. Biederman { }, 323039a23e75SDenis V. Lunev }; 323139a23e75SDenis V. Lunev 323239a23e75SDenis V. Lunev static __net_init int sysctl_route_net_init(struct net *net) 323339a23e75SDenis V. Lunev { 323439a23e75SDenis V. Lunev struct ctl_table *tbl; 323539a23e75SDenis V. Lunev 323639a23e75SDenis V. Lunev tbl = ipv4_route_flush_table; 323709ad9bc7SOctavian Purdila if (!net_eq(net, &init_net)) { 323839a23e75SDenis V. Lunev tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); 323951456b29SIan Morris if (!tbl) 324039a23e75SDenis V. Lunev goto err_dup; 3241464dc801SEric W. Biederman 3242464dc801SEric W. Biederman /* Don't export sysctls to unprivileged users */ 3243464dc801SEric W. Biederman if (net->user_ns != &init_user_ns) 3244464dc801SEric W. Biederman tbl[0].procname = NULL; 324539a23e75SDenis V. Lunev } 324639a23e75SDenis V. Lunev tbl[0].extra1 = net; 324739a23e75SDenis V. Lunev 3248ec8f23ceSEric W. Biederman net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl); 324951456b29SIan Morris if (!net->ipv4.route_hdr) 325039a23e75SDenis V. Lunev goto err_reg; 325139a23e75SDenis V. Lunev return 0; 325239a23e75SDenis V. Lunev 325339a23e75SDenis V. Lunev err_reg: 325439a23e75SDenis V. Lunev if (tbl != ipv4_route_flush_table) 325539a23e75SDenis V. Lunev kfree(tbl); 325639a23e75SDenis V. Lunev err_dup: 325739a23e75SDenis V. Lunev return -ENOMEM; 325839a23e75SDenis V. Lunev } 325939a23e75SDenis V. Lunev 326039a23e75SDenis V. Lunev static __net_exit void sysctl_route_net_exit(struct net *net) 326139a23e75SDenis V. Lunev { 326239a23e75SDenis V. Lunev struct ctl_table *tbl; 326339a23e75SDenis V. Lunev 326439a23e75SDenis V. Lunev tbl = net->ipv4.route_hdr->ctl_table_arg; 326539a23e75SDenis V. Lunev unregister_net_sysctl_table(net->ipv4.route_hdr); 326639a23e75SDenis V. Lunev BUG_ON(tbl == ipv4_route_flush_table); 326739a23e75SDenis V. Lunev kfree(tbl); 326839a23e75SDenis V. Lunev } 326939a23e75SDenis V. Lunev 327039a23e75SDenis V. Lunev static __net_initdata struct pernet_operations sysctl_route_ops = { 327139a23e75SDenis V. Lunev .init = sysctl_route_net_init, 327239a23e75SDenis V. Lunev .exit = sysctl_route_net_exit, 327339a23e75SDenis V. Lunev }; 32741da177e4SLinus Torvalds #endif 32751da177e4SLinus Torvalds 32763ee94372SNeil Horman static __net_init int rt_genid_init(struct net *net) 32779f5e97e5SDenis V. Lunev { 3278ca4c3fc2Sfan.du atomic_set(&net->ipv4.rt_genid, 0); 32795aad1de5STimo Teräs atomic_set(&net->fnhe_genid, 0); 32807aed9f72SJason A. Donenfeld atomic_set(&net->ipv4.dev_addr_genid, get_random_int()); 32819f5e97e5SDenis V. Lunev return 0; 32829f5e97e5SDenis V. Lunev } 32839f5e97e5SDenis V. Lunev 32843ee94372SNeil Horman static __net_initdata struct pernet_operations rt_genid_ops = { 32853ee94372SNeil Horman .init = rt_genid_init, 32869f5e97e5SDenis V. Lunev }; 32879f5e97e5SDenis V. Lunev 3288c3426b47SDavid S. Miller static int __net_init ipv4_inetpeer_init(struct net *net) 3289c3426b47SDavid S. Miller { 3290c3426b47SDavid S. Miller struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 3291c3426b47SDavid S. Miller 3292c3426b47SDavid S. Miller if (!bp) 3293c3426b47SDavid S. Miller return -ENOMEM; 3294c3426b47SDavid S. Miller inet_peer_base_init(bp); 3295c3426b47SDavid S. Miller net->ipv4.peers = bp; 3296c3426b47SDavid S. Miller return 0; 3297c3426b47SDavid S. Miller } 3298c3426b47SDavid S. Miller 3299c3426b47SDavid S. Miller static void __net_exit ipv4_inetpeer_exit(struct net *net) 3300c3426b47SDavid S. Miller { 3301c3426b47SDavid S. Miller struct inet_peer_base *bp = net->ipv4.peers; 3302c3426b47SDavid S. Miller 3303c3426b47SDavid S. Miller net->ipv4.peers = NULL; 330456a6b248SDavid S. Miller inetpeer_invalidate_tree(bp); 3305c3426b47SDavid S. Miller kfree(bp); 3306c3426b47SDavid S. Miller } 3307c3426b47SDavid S. Miller 3308c3426b47SDavid S. Miller static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { 3309c3426b47SDavid S. Miller .init = ipv4_inetpeer_init, 3310c3426b47SDavid S. Miller .exit = ipv4_inetpeer_exit, 3311c3426b47SDavid S. Miller }; 33129f5e97e5SDenis V. Lunev 3313c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 33147d720c3eSTejun Heo struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3315c7066f70SPatrick McHardy #endif /* CONFIG_IP_ROUTE_CLASSID */ 33161da177e4SLinus Torvalds 33171da177e4SLinus Torvalds int __init ip_rt_init(void) 33181da177e4SLinus Torvalds { 33195055c371SEric Dumazet int cpu; 33201da177e4SLinus Torvalds 33216da2ec56SKees Cook ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents), 33226da2ec56SKees Cook GFP_KERNEL); 332373f156a6SEric Dumazet if (!ip_idents) 332473f156a6SEric Dumazet panic("IP: failed to allocate ip_idents\n"); 332573f156a6SEric Dumazet 332673f156a6SEric Dumazet prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); 332773f156a6SEric Dumazet 3328355b590cSEric Dumazet ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL); 3329355b590cSEric Dumazet if (!ip_tstamps) 3330355b590cSEric Dumazet panic("IP: failed to allocate ip_tstamps\n"); 3331355b590cSEric Dumazet 33325055c371SEric Dumazet for_each_possible_cpu(cpu) { 33335055c371SEric Dumazet struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); 33345055c371SEric Dumazet 33355055c371SEric Dumazet INIT_LIST_HEAD(&ul->head); 33365055c371SEric Dumazet spin_lock_init(&ul->lock); 33375055c371SEric Dumazet } 3338c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID 33390dcec8c2SIngo Molnar ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 33401da177e4SLinus Torvalds if (!ip_rt_acct) 33411da177e4SLinus Torvalds panic("IP: failed to allocate ip_rt_acct\n"); 33421da177e4SLinus Torvalds #endif 33431da177e4SLinus Torvalds 3344e5d679f3SAlexey Dobriyan ipv4_dst_ops.kmem_cachep = 3345e5d679f3SAlexey Dobriyan kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, 334620c2df83SPaul Mundt SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 33471da177e4SLinus Torvalds 334814e50e57SDavid S. Miller ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; 334914e50e57SDavid S. Miller 3350fc66f95cSEric Dumazet if (dst_entries_init(&ipv4_dst_ops) < 0) 3351fc66f95cSEric Dumazet panic("IP: failed to allocate ipv4_dst_ops counter\n"); 3352fc66f95cSEric Dumazet 3353fc66f95cSEric Dumazet if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) 3354fc66f95cSEric Dumazet panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); 3355fc66f95cSEric Dumazet 335689aef892SDavid S. Miller ipv4_dst_ops.gc_thresh = ~0; 335789aef892SDavid S. Miller ip_rt_max_size = INT_MAX; 33581da177e4SLinus Torvalds 33591da177e4SLinus Torvalds devinet_init(); 33601da177e4SLinus Torvalds ip_fib_init(); 33611da177e4SLinus Torvalds 336273b38711SDenis V. Lunev if (ip_rt_proc_init()) 3363058bd4d2SJoe Perches pr_err("Unable to create route proc files\n"); 33641da177e4SLinus Torvalds #ifdef CONFIG_XFRM 33651da177e4SLinus Torvalds xfrm_init(); 3366703fb94eSSteffen Klassert xfrm4_init(); 33671da177e4SLinus Torvalds #endif 3368394f51abSFlorian Westphal rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, 3369394f51abSFlorian Westphal RTNL_FLAG_DOIT_UNLOCKED); 337063f3444fSThomas Graf 337139a23e75SDenis V. Lunev #ifdef CONFIG_SYSCTL 337239a23e75SDenis V. Lunev register_pernet_subsys(&sysctl_route_ops); 337339a23e75SDenis V. Lunev #endif 33743ee94372SNeil Horman register_pernet_subsys(&rt_genid_ops); 3375c3426b47SDavid S. Miller register_pernet_subsys(&ipv4_inetpeer_ops); 33761bcdca3fSTim Hansen return 0; 33771da177e4SLinus Torvalds } 33781da177e4SLinus Torvalds 3379a1bc6eb4SAl Viro #ifdef CONFIG_SYSCTL 3380eeb61f71SAl Viro /* 3381eeb61f71SAl Viro * We really need to sanitize the damn ipv4 init order, then all 3382eeb61f71SAl Viro * this nonsense will go away. 3383eeb61f71SAl Viro */ 3384eeb61f71SAl Viro void __init ip_static_sysctl_init(void) 3385eeb61f71SAl Viro { 33864e5ca785SEric W. Biederman register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table); 3387eeb61f71SAl Viro } 3388a1bc6eb4SAl Viro #endif 3389