xref: /linux/net/ipv4/route.c (revision d948974ccc6613b30636014f76700de3aad7e9b7)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
41da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
51da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *		ROUTE - implementation of the IP router.
81da177e4SLinus Torvalds  *
902c30a84SJesper Juhl  * Authors:	Ross Biro
101da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
111da177e4SLinus Torvalds  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
121da177e4SLinus Torvalds  *		Linus Torvalds, <Linus.Torvalds@helsinki.fi>
131da177e4SLinus Torvalds  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
141da177e4SLinus Torvalds  *
151da177e4SLinus Torvalds  * Fixes:
161da177e4SLinus Torvalds  *		Alan Cox	:	Verify area fixes.
171da177e4SLinus Torvalds  *		Alan Cox	:	cli() protects routing changes
181da177e4SLinus Torvalds  *		Rui Oliveira	:	ICMP routing table updates
191da177e4SLinus Torvalds  *		(rco@di.uminho.pt)	Routing table insertion and update
201da177e4SLinus Torvalds  *		Linus Torvalds	:	Rewrote bits to be sensible
211da177e4SLinus Torvalds  *		Alan Cox	:	Added BSD route gw semantics
221da177e4SLinus Torvalds  *		Alan Cox	:	Super /proc >4K
231da177e4SLinus Torvalds  *		Alan Cox	:	MTU in route table
241da177e4SLinus Torvalds  *		Alan Cox	: 	MSS actually. Also added the window
251da177e4SLinus Torvalds  *					clamper.
261da177e4SLinus Torvalds  *		Sam Lantinga	:	Fixed route matching in rt_del()
271da177e4SLinus Torvalds  *		Alan Cox	:	Routing cache support.
281da177e4SLinus Torvalds  *		Alan Cox	:	Removed compatibility cruft.
291da177e4SLinus Torvalds  *		Alan Cox	:	RTF_REJECT support.
301da177e4SLinus Torvalds  *		Alan Cox	:	TCP irtt support.
311da177e4SLinus Torvalds  *		Jonathan Naylor	:	Added Metric support.
321da177e4SLinus Torvalds  *	Miquel van Smoorenburg	:	BSD API fixes.
331da177e4SLinus Torvalds  *	Miquel van Smoorenburg	:	Metrics.
341da177e4SLinus Torvalds  *		Alan Cox	:	Use __u32 properly
351da177e4SLinus Torvalds  *		Alan Cox	:	Aligned routing errors more closely with BSD
361da177e4SLinus Torvalds  *					our system is still very different.
371da177e4SLinus Torvalds  *		Alan Cox	:	Faster /proc handling
381da177e4SLinus Torvalds  *	Alexey Kuznetsov	:	Massive rework to support tree based routing,
391da177e4SLinus Torvalds  *					routing caches and better behaviour.
401da177e4SLinus Torvalds  *
411da177e4SLinus Torvalds  *		Olaf Erb	:	irtt wasn't being copied right.
421da177e4SLinus Torvalds  *		Bjorn Ekwall	:	Kerneld route support.
431da177e4SLinus Torvalds  *		Alan Cox	:	Multicast fixed (I hope)
441da177e4SLinus Torvalds  * 		Pavel Krauz	:	Limited broadcast fixed
451da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
461da177e4SLinus Torvalds  *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
471da177e4SLinus Torvalds  *					route.c and rewritten from scratch.
481da177e4SLinus Torvalds  *		Andi Kleen	:	Load-limit warning messages.
491da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
501da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Race condition in ip_route_input_slow.
511da177e4SLinus Torvalds  *	Tobias Ringstrom	:	Uninitialized res.type in ip_route_output_slow.
521da177e4SLinus Torvalds  *	Vladimir V. Ivanov	:	IP rule info (flowid) is really useful.
531da177e4SLinus Torvalds  *		Marc Boucher	:	routing by fwmark
541da177e4SLinus Torvalds  *	Robert Olsson		:	Added rt_cache statistics
551da177e4SLinus Torvalds  *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
56bb1d23b0SEric Dumazet  *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
57cef2685eSIlia Sotnikov  * 	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
58cef2685eSIlia Sotnikov  * 	Ilia Sotnikov		:	Removed TOS from hash calculations
591da177e4SLinus Torvalds  */
601da177e4SLinus Torvalds 
61afd46503SJoe Perches #define pr_fmt(fmt) "IPv4: " fmt
62afd46503SJoe Perches 
631da177e4SLinus Torvalds #include <linux/module.h>
647c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
651da177e4SLinus Torvalds #include <linux/bitops.h>
661da177e4SLinus Torvalds #include <linux/types.h>
671da177e4SLinus Torvalds #include <linux/kernel.h>
681da177e4SLinus Torvalds #include <linux/mm.h>
691da177e4SLinus Torvalds #include <linux/string.h>
701da177e4SLinus Torvalds #include <linux/socket.h>
711da177e4SLinus Torvalds #include <linux/sockios.h>
721da177e4SLinus Torvalds #include <linux/errno.h>
731da177e4SLinus Torvalds #include <linux/in.h>
741da177e4SLinus Torvalds #include <linux/inet.h>
751da177e4SLinus Torvalds #include <linux/netdevice.h>
761da177e4SLinus Torvalds #include <linux/proc_fs.h>
771da177e4SLinus Torvalds #include <linux/init.h>
781da177e4SLinus Torvalds #include <linux/skbuff.h>
791da177e4SLinus Torvalds #include <linux/inetdevice.h>
801da177e4SLinus Torvalds #include <linux/igmp.h>
811da177e4SLinus Torvalds #include <linux/pkt_sched.h>
821da177e4SLinus Torvalds #include <linux/mroute.h>
831da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
841da177e4SLinus Torvalds #include <linux/random.h>
851da177e4SLinus Torvalds #include <linux/rcupdate.h>
861da177e4SLinus Torvalds #include <linux/times.h>
875a0e3ad6STejun Heo #include <linux/slab.h>
8873f156a6SEric Dumazet #include <linux/jhash.h>
89352e512cSHerbert Xu #include <net/dst.h>
901b7179d3SThomas Graf #include <net/dst_metadata.h>
91457c4cbcSEric W. Biederman #include <net/net_namespace.h>
921da177e4SLinus Torvalds #include <net/protocol.h>
931da177e4SLinus Torvalds #include <net/ip.h>
941da177e4SLinus Torvalds #include <net/route.h>
951da177e4SLinus Torvalds #include <net/inetpeer.h>
961da177e4SLinus Torvalds #include <net/sock.h>
971da177e4SLinus Torvalds #include <net/ip_fib.h>
985481d73fSDavid Ahern #include <net/nexthop.h>
991da177e4SLinus Torvalds #include <net/arp.h>
1001da177e4SLinus Torvalds #include <net/tcp.h>
1011da177e4SLinus Torvalds #include <net/icmp.h>
1021da177e4SLinus Torvalds #include <net/xfrm.h>
103571e7226SRoopa Prabhu #include <net/lwtunnel.h>
1048d71740cSTom Tucker #include <net/netevent.h>
10563f3444fSThomas Graf #include <net/rtnetlink.h>
1061da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
1071da177e4SLinus Torvalds #include <linux/sysctl.h>
1081da177e4SLinus Torvalds #endif
1096e5714eaSDavid S. Miller #include <net/secure_seq.h>
1101b7179d3SThomas Graf #include <net/ip_tunnels.h>
111385add90SDavid Ahern #include <net/l3mdev.h>
1121da177e4SLinus Torvalds 
113b6179813SRoopa Prabhu #include "fib_lookup.h"
114b6179813SRoopa Prabhu 
11568a5e3ddSDavid S. Miller #define RT_FL_TOS(oldflp4) \
116f61759e6SJulian Anastasov 	((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
1171da177e4SLinus Torvalds 
1181da177e4SLinus Torvalds #define RT_GC_TIMEOUT (300*HZ)
1191da177e4SLinus Torvalds 
1201da177e4SLinus Torvalds static int ip_rt_max_size;
121817bc4dbSStephen Hemminger static int ip_rt_redirect_number __read_mostly	= 9;
122817bc4dbSStephen Hemminger static int ip_rt_redirect_load __read_mostly	= HZ / 50;
123817bc4dbSStephen Hemminger static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
124817bc4dbSStephen Hemminger static int ip_rt_error_cost __read_mostly	= HZ;
125817bc4dbSStephen Hemminger static int ip_rt_error_burst __read_mostly	= 5 * HZ;
126817bc4dbSStephen Hemminger static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
127c7272c2fSSabrina Dubroca static u32 ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
128817bc4dbSStephen Hemminger static int ip_rt_min_advmss __read_mostly	= 256;
1299f28a2fcSEric Dumazet 
130deed49dfSXin Long static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
131c7272c2fSSabrina Dubroca 
1321da177e4SLinus Torvalds /*
1331da177e4SLinus Torvalds  *	Interface to generic destination cache.
1341da177e4SLinus Torvalds  */
1351da177e4SLinus Torvalds 
1361da177e4SLinus Torvalds static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
1370dbaee3bSDavid S. Miller static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
138ebb762f2SSteffen Klassert static unsigned int	 ipv4_mtu(const struct dst_entry *dst);
1391da177e4SLinus Torvalds static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
1401da177e4SLinus Torvalds static void		 ipv4_link_failure(struct sk_buff *skb);
1416700c270SDavid S. Miller static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1426700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
1436700c270SDavid S. Miller static void		 ip_do_redirect(struct dst_entry *dst, struct sock *sk,
1446700c270SDavid S. Miller 					struct sk_buff *skb);
145caacf05eSDavid S. Miller static void		ipv4_dst_destroy(struct dst_entry *dst);
1461da177e4SLinus Torvalds 
14762fa8a84SDavid S. Miller static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
14862fa8a84SDavid S. Miller {
14931248731SDavid S. Miller 	WARN_ON(1);
15031248731SDavid S. Miller 	return NULL;
15162fa8a84SDavid S. Miller }
15262fa8a84SDavid S. Miller 
153f894cbf8SDavid S. Miller static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
154f894cbf8SDavid S. Miller 					   struct sk_buff *skb,
155f894cbf8SDavid S. Miller 					   const void *daddr);
15663fca65dSJulian Anastasov static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
157d3aaeb38SDavid S. Miller 
1581da177e4SLinus Torvalds static struct dst_ops ipv4_dst_ops = {
1591da177e4SLinus Torvalds 	.family =		AF_INET,
1601da177e4SLinus Torvalds 	.check =		ipv4_dst_check,
1610dbaee3bSDavid S. Miller 	.default_advmss =	ipv4_default_advmss,
162ebb762f2SSteffen Klassert 	.mtu =			ipv4_mtu,
16362fa8a84SDavid S. Miller 	.cow_metrics =		ipv4_cow_metrics,
164caacf05eSDavid S. Miller 	.destroy =		ipv4_dst_destroy,
1651da177e4SLinus Torvalds 	.negative_advice =	ipv4_negative_advice,
1661da177e4SLinus Torvalds 	.link_failure =		ipv4_link_failure,
1671da177e4SLinus Torvalds 	.update_pmtu =		ip_rt_update_pmtu,
168e47a185bSDavid S. Miller 	.redirect =		ip_do_redirect,
169b92dacd4SEric W. Biederman 	.local_out =		__ip_local_out,
170d3aaeb38SDavid S. Miller 	.neigh_lookup =		ipv4_neigh_lookup,
17163fca65dSJulian Anastasov 	.confirm_neigh =	ipv4_confirm_neigh,
1721da177e4SLinus Torvalds };
1731da177e4SLinus Torvalds 
1741da177e4SLinus Torvalds #define ECN_OR_COST(class)	TC_PRIO_##class
1751da177e4SLinus Torvalds 
1764839c52bSPhilippe De Muyter const __u8 ip_tos2prio[16] = {
1771da177e4SLinus Torvalds 	TC_PRIO_BESTEFFORT,
1784a2b9c37SDan Siemon 	ECN_OR_COST(BESTEFFORT),
1791da177e4SLinus Torvalds 	TC_PRIO_BESTEFFORT,
1801da177e4SLinus Torvalds 	ECN_OR_COST(BESTEFFORT),
1811da177e4SLinus Torvalds 	TC_PRIO_BULK,
1821da177e4SLinus Torvalds 	ECN_OR_COST(BULK),
1831da177e4SLinus Torvalds 	TC_PRIO_BULK,
1841da177e4SLinus Torvalds 	ECN_OR_COST(BULK),
1851da177e4SLinus Torvalds 	TC_PRIO_INTERACTIVE,
1861da177e4SLinus Torvalds 	ECN_OR_COST(INTERACTIVE),
1871da177e4SLinus Torvalds 	TC_PRIO_INTERACTIVE,
1881da177e4SLinus Torvalds 	ECN_OR_COST(INTERACTIVE),
1891da177e4SLinus Torvalds 	TC_PRIO_INTERACTIVE_BULK,
1901da177e4SLinus Torvalds 	ECN_OR_COST(INTERACTIVE_BULK),
1911da177e4SLinus Torvalds 	TC_PRIO_INTERACTIVE_BULK,
1921da177e4SLinus Torvalds 	ECN_OR_COST(INTERACTIVE_BULK)
1931da177e4SLinus Torvalds };
194d4a96865SAmir Vadai EXPORT_SYMBOL(ip_tos2prio);
1951da177e4SLinus Torvalds 
1962f970d83SEric Dumazet static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
1973ed66e91SChristoph Lameter #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
1981da177e4SLinus Torvalds 
1991da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
2001da177e4SLinus Torvalds static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
2011da177e4SLinus Torvalds {
20229e75252SEric Dumazet 	if (*pos)
20389aef892SDavid S. Miller 		return NULL;
20429e75252SEric Dumazet 	return SEQ_START_TOKEN;
2051da177e4SLinus Torvalds }
2061da177e4SLinus Torvalds 
2071da177e4SLinus Torvalds static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2081da177e4SLinus Torvalds {
2091da177e4SLinus Torvalds 	++*pos;
21089aef892SDavid S. Miller 	return NULL;
2111da177e4SLinus Torvalds }
2121da177e4SLinus Torvalds 
2131da177e4SLinus Torvalds static void rt_cache_seq_stop(struct seq_file *seq, void *v)
2141da177e4SLinus Torvalds {
2151da177e4SLinus Torvalds }
2161da177e4SLinus Torvalds 
2171da177e4SLinus Torvalds static int rt_cache_seq_show(struct seq_file *seq, void *v)
2181da177e4SLinus Torvalds {
2191da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN)
2201da177e4SLinus Torvalds 		seq_printf(seq, "%-127s\n",
2211da177e4SLinus Torvalds 			   "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
2221da177e4SLinus Torvalds 			   "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
2231da177e4SLinus Torvalds 			   "HHUptod\tSpecDst");
2241da177e4SLinus Torvalds 	return 0;
2251da177e4SLinus Torvalds }
2261da177e4SLinus Torvalds 
227f690808eSStephen Hemminger static const struct seq_operations rt_cache_seq_ops = {
2281da177e4SLinus Torvalds 	.start  = rt_cache_seq_start,
2291da177e4SLinus Torvalds 	.next   = rt_cache_seq_next,
2301da177e4SLinus Torvalds 	.stop   = rt_cache_seq_stop,
2311da177e4SLinus Torvalds 	.show   = rt_cache_seq_show,
2321da177e4SLinus Torvalds };
2331da177e4SLinus Torvalds 
2341da177e4SLinus Torvalds static int rt_cache_seq_open(struct inode *inode, struct file *file)
2351da177e4SLinus Torvalds {
23689aef892SDavid S. Miller 	return seq_open(file, &rt_cache_seq_ops);
2371da177e4SLinus Torvalds }
2381da177e4SLinus Torvalds 
2399a32144eSArjan van de Ven static const struct file_operations rt_cache_seq_fops = {
2401da177e4SLinus Torvalds 	.open	 = rt_cache_seq_open,
2411da177e4SLinus Torvalds 	.read	 = seq_read,
2421da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
24389aef892SDavid S. Miller 	.release = seq_release,
2441da177e4SLinus Torvalds };
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds 
2471da177e4SLinus Torvalds static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
2481da177e4SLinus Torvalds {
2491da177e4SLinus Torvalds 	int cpu;
2501da177e4SLinus Torvalds 
2511da177e4SLinus Torvalds 	if (*pos == 0)
2521da177e4SLinus Torvalds 		return SEQ_START_TOKEN;
2531da177e4SLinus Torvalds 
2540f23174aSRusty Russell 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2551da177e4SLinus Torvalds 		if (!cpu_possible(cpu))
2561da177e4SLinus Torvalds 			continue;
2571da177e4SLinus Torvalds 		*pos = cpu+1;
2582f970d83SEric Dumazet 		return &per_cpu(rt_cache_stat, cpu);
2591da177e4SLinus Torvalds 	}
2601da177e4SLinus Torvalds 	return NULL;
2611da177e4SLinus Torvalds }
2621da177e4SLinus Torvalds 
2631da177e4SLinus Torvalds static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2641da177e4SLinus Torvalds {
2651da177e4SLinus Torvalds 	int cpu;
2661da177e4SLinus Torvalds 
2670f23174aSRusty Russell 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2681da177e4SLinus Torvalds 		if (!cpu_possible(cpu))
2691da177e4SLinus Torvalds 			continue;
2701da177e4SLinus Torvalds 		*pos = cpu+1;
2712f970d83SEric Dumazet 		return &per_cpu(rt_cache_stat, cpu);
2721da177e4SLinus Torvalds 	}
2731da177e4SLinus Torvalds 	return NULL;
2741da177e4SLinus Torvalds 
2751da177e4SLinus Torvalds }
2761da177e4SLinus Torvalds 
2771da177e4SLinus Torvalds static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
2781da177e4SLinus Torvalds {
2791da177e4SLinus Torvalds 
2801da177e4SLinus Torvalds }
2811da177e4SLinus Torvalds 
2821da177e4SLinus Torvalds static int rt_cpu_seq_show(struct seq_file *seq, void *v)
2831da177e4SLinus Torvalds {
2841da177e4SLinus Torvalds 	struct rt_cache_stat *st = v;
2851da177e4SLinus Torvalds 
2861da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
2875bec0039SOlaf Rempel 		seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
2881da177e4SLinus Torvalds 		return 0;
2891da177e4SLinus Torvalds 	}
2901da177e4SLinus Torvalds 
2911da177e4SLinus Torvalds 	seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
2921da177e4SLinus Torvalds 		   " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
293fc66f95cSEric Dumazet 		   dst_entries_get_slow(&ipv4_dst_ops),
2940baf2b35SEric Dumazet 		   0, /* st->in_hit */
2951da177e4SLinus Torvalds 		   st->in_slow_tot,
2961da177e4SLinus Torvalds 		   st->in_slow_mc,
2971da177e4SLinus Torvalds 		   st->in_no_route,
2981da177e4SLinus Torvalds 		   st->in_brd,
2991da177e4SLinus Torvalds 		   st->in_martian_dst,
3001da177e4SLinus Torvalds 		   st->in_martian_src,
3011da177e4SLinus Torvalds 
3020baf2b35SEric Dumazet 		   0, /* st->out_hit */
3031da177e4SLinus Torvalds 		   st->out_slow_tot,
3041da177e4SLinus Torvalds 		   st->out_slow_mc,
3051da177e4SLinus Torvalds 
3060baf2b35SEric Dumazet 		   0, /* st->gc_total */
3070baf2b35SEric Dumazet 		   0, /* st->gc_ignored */
3080baf2b35SEric Dumazet 		   0, /* st->gc_goal_miss */
3090baf2b35SEric Dumazet 		   0, /* st->gc_dst_overflow */
3100baf2b35SEric Dumazet 		   0, /* st->in_hlist_search */
3110baf2b35SEric Dumazet 		   0  /* st->out_hlist_search */
3121da177e4SLinus Torvalds 		);
3131da177e4SLinus Torvalds 	return 0;
3141da177e4SLinus Torvalds }
3151da177e4SLinus Torvalds 
316f690808eSStephen Hemminger static const struct seq_operations rt_cpu_seq_ops = {
3171da177e4SLinus Torvalds 	.start  = rt_cpu_seq_start,
3181da177e4SLinus Torvalds 	.next   = rt_cpu_seq_next,
3191da177e4SLinus Torvalds 	.stop   = rt_cpu_seq_stop,
3201da177e4SLinus Torvalds 	.show   = rt_cpu_seq_show,
3211da177e4SLinus Torvalds };
3221da177e4SLinus Torvalds 
3231da177e4SLinus Torvalds 
3241da177e4SLinus Torvalds static int rt_cpu_seq_open(struct inode *inode, struct file *file)
3251da177e4SLinus Torvalds {
3261da177e4SLinus Torvalds 	return seq_open(file, &rt_cpu_seq_ops);
3271da177e4SLinus Torvalds }
3281da177e4SLinus Torvalds 
3299a32144eSArjan van de Ven static const struct file_operations rt_cpu_seq_fops = {
3301da177e4SLinus Torvalds 	.open	 = rt_cpu_seq_open,
3311da177e4SLinus Torvalds 	.read	 = seq_read,
3321da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
3331da177e4SLinus Torvalds 	.release = seq_release,
3341da177e4SLinus Torvalds };
3351da177e4SLinus Torvalds 
336c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
337a661c419SAlexey Dobriyan static int rt_acct_proc_show(struct seq_file *m, void *v)
33878c686e9SPavel Emelyanov {
339a661c419SAlexey Dobriyan 	struct ip_rt_acct *dst, *src;
340a661c419SAlexey Dobriyan 	unsigned int i, j;
34178c686e9SPavel Emelyanov 
342a661c419SAlexey Dobriyan 	dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
343a661c419SAlexey Dobriyan 	if (!dst)
344a661c419SAlexey Dobriyan 		return -ENOMEM;
34578c686e9SPavel Emelyanov 
346a661c419SAlexey Dobriyan 	for_each_possible_cpu(i) {
347a661c419SAlexey Dobriyan 		src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
348a661c419SAlexey Dobriyan 		for (j = 0; j < 256; j++) {
349a661c419SAlexey Dobriyan 			dst[j].o_bytes   += src[j].o_bytes;
350a661c419SAlexey Dobriyan 			dst[j].o_packets += src[j].o_packets;
351a661c419SAlexey Dobriyan 			dst[j].i_bytes   += src[j].i_bytes;
352a661c419SAlexey Dobriyan 			dst[j].i_packets += src[j].i_packets;
353a661c419SAlexey Dobriyan 		}
354a661c419SAlexey Dobriyan 	}
355a661c419SAlexey Dobriyan 
356a661c419SAlexey Dobriyan 	seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
357a661c419SAlexey Dobriyan 	kfree(dst);
35878c686e9SPavel Emelyanov 	return 0;
35978c686e9SPavel Emelyanov }
36078c686e9SPavel Emelyanov #endif
361107f1634SPavel Emelyanov 
36273b38711SDenis V. Lunev static int __net_init ip_rt_do_proc_init(struct net *net)
363107f1634SPavel Emelyanov {
364107f1634SPavel Emelyanov 	struct proc_dir_entry *pde;
365107f1634SPavel Emelyanov 
366d6444062SJoe Perches 	pde = proc_create("rt_cache", 0444, net->proc_net,
367107f1634SPavel Emelyanov 			  &rt_cache_seq_fops);
368107f1634SPavel Emelyanov 	if (!pde)
369107f1634SPavel Emelyanov 		goto err1;
370107f1634SPavel Emelyanov 
371d6444062SJoe Perches 	pde = proc_create("rt_cache", 0444,
37277020720SWang Chen 			  net->proc_net_stat, &rt_cpu_seq_fops);
373107f1634SPavel Emelyanov 	if (!pde)
374107f1634SPavel Emelyanov 		goto err2;
375107f1634SPavel Emelyanov 
376c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
3773f3942acSChristoph Hellwig 	pde = proc_create_single("rt_acct", 0, net->proc_net,
3783f3942acSChristoph Hellwig 			rt_acct_proc_show);
379107f1634SPavel Emelyanov 	if (!pde)
380107f1634SPavel Emelyanov 		goto err3;
381107f1634SPavel Emelyanov #endif
382107f1634SPavel Emelyanov 	return 0;
383107f1634SPavel Emelyanov 
384c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
385107f1634SPavel Emelyanov err3:
386107f1634SPavel Emelyanov 	remove_proc_entry("rt_cache", net->proc_net_stat);
387107f1634SPavel Emelyanov #endif
388107f1634SPavel Emelyanov err2:
389107f1634SPavel Emelyanov 	remove_proc_entry("rt_cache", net->proc_net);
390107f1634SPavel Emelyanov err1:
391107f1634SPavel Emelyanov 	return -ENOMEM;
392107f1634SPavel Emelyanov }
39373b38711SDenis V. Lunev 
39473b38711SDenis V. Lunev static void __net_exit ip_rt_do_proc_exit(struct net *net)
39573b38711SDenis V. Lunev {
39673b38711SDenis V. Lunev 	remove_proc_entry("rt_cache", net->proc_net_stat);
39773b38711SDenis V. Lunev 	remove_proc_entry("rt_cache", net->proc_net);
398c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
39973b38711SDenis V. Lunev 	remove_proc_entry("rt_acct", net->proc_net);
4000a931acfSAlexey Dobriyan #endif
40173b38711SDenis V. Lunev }
40273b38711SDenis V. Lunev 
40373b38711SDenis V. Lunev static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
40473b38711SDenis V. Lunev 	.init = ip_rt_do_proc_init,
40573b38711SDenis V. Lunev 	.exit = ip_rt_do_proc_exit,
40673b38711SDenis V. Lunev };
40773b38711SDenis V. Lunev 
40873b38711SDenis V. Lunev static int __init ip_rt_proc_init(void)
40973b38711SDenis V. Lunev {
41073b38711SDenis V. Lunev 	return register_pernet_subsys(&ip_rt_proc_ops);
41173b38711SDenis V. Lunev }
41273b38711SDenis V. Lunev 
413107f1634SPavel Emelyanov #else
41473b38711SDenis V. Lunev static inline int ip_rt_proc_init(void)
415107f1634SPavel Emelyanov {
416107f1634SPavel Emelyanov 	return 0;
417107f1634SPavel Emelyanov }
4181da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
4191da177e4SLinus Torvalds 
4204331debcSEric Dumazet static inline bool rt_is_expired(const struct rtable *rth)
421e84f84f2SDenis V. Lunev {
422ca4c3fc2Sfan.du 	return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
423e84f84f2SDenis V. Lunev }
424e84f84f2SDenis V. Lunev 
4254ccfe6d4SNicolas Dichtel void rt_cache_flush(struct net *net)
42629e75252SEric Dumazet {
427ca4c3fc2Sfan.du 	rt_genid_bump_ipv4(net);
42898376387SEric Dumazet }
42998376387SEric Dumazet 
430f894cbf8SDavid S. Miller static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
431f894cbf8SDavid S. Miller 					   struct sk_buff *skb,
432f894cbf8SDavid S. Miller 					   const void *daddr)
4333769cffbSDavid Miller {
4341550c171SDavid Ahern 	const struct rtable *rt = container_of(dst, struct rtable, dst);
435d3aaeb38SDavid S. Miller 	struct net_device *dev = dst->dev;
4363769cffbSDavid Miller 	struct neighbour *n;
4373769cffbSDavid Miller 
4385c9f7c1dSDavid Ahern 	rcu_read_lock_bh();
439d3aaeb38SDavid S. Miller 
4405c9f7c1dSDavid Ahern 	if (likely(rt->rt_gw_family == AF_INET)) {
4415c9f7c1dSDavid Ahern 		n = ip_neigh_gw4(dev, rt->rt_gw4);
4425c9f7c1dSDavid Ahern 	} else if (rt->rt_gw_family == AF_INET6) {
4435c9f7c1dSDavid Ahern 		n = ip_neigh_gw6(dev, &rt->rt_gw6);
4445c9f7c1dSDavid Ahern         } else {
4455c9f7c1dSDavid Ahern 		__be32 pkey;
4465c9f7c1dSDavid Ahern 
4475c9f7c1dSDavid Ahern 		pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
4485c9f7c1dSDavid Ahern 		n = ip_neigh_gw4(dev, pkey);
4495c9f7c1dSDavid Ahern 	}
4505c9f7c1dSDavid Ahern 
4515c9f7c1dSDavid Ahern 	if (n && !refcount_inc_not_zero(&n->refcnt))
4525c9f7c1dSDavid Ahern 		n = NULL;
4535c9f7c1dSDavid Ahern 
4545c9f7c1dSDavid Ahern 	rcu_read_unlock_bh();
4555c9f7c1dSDavid Ahern 
456d3aaeb38SDavid S. Miller 	return n;
457d3aaeb38SDavid S. Miller }
458d3aaeb38SDavid S. Miller 
45963fca65dSJulian Anastasov static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
46063fca65dSJulian Anastasov {
4611550c171SDavid Ahern 	const struct rtable *rt = container_of(dst, struct rtable, dst);
46263fca65dSJulian Anastasov 	struct net_device *dev = dst->dev;
46363fca65dSJulian Anastasov 	const __be32 *pkey = daddr;
46463fca65dSJulian Anastasov 
4656de9c055SDavid Ahern 	if (rt->rt_gw_family == AF_INET) {
4661550c171SDavid Ahern 		pkey = (const __be32 *)&rt->rt_gw4;
4676de9c055SDavid Ahern 	} else if (rt->rt_gw_family == AF_INET6) {
4686de9c055SDavid Ahern 		return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
4696de9c055SDavid Ahern 	} else if (!daddr ||
47063fca65dSJulian Anastasov 		 (rt->rt_flags &
4716de9c055SDavid Ahern 		  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
47263fca65dSJulian Anastasov 		return;
4736de9c055SDavid Ahern 	}
47463fca65dSJulian Anastasov 	__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
47563fca65dSJulian Anastasov }
47663fca65dSJulian Anastasov 
47704ca6973SEric Dumazet #define IP_IDENTS_SZ 2048u
47804ca6973SEric Dumazet 
479355b590cSEric Dumazet static atomic_t *ip_idents __read_mostly;
480355b590cSEric Dumazet static u32 *ip_tstamps __read_mostly;
48104ca6973SEric Dumazet 
48204ca6973SEric Dumazet /* In order to protect privacy, we add a perturbation to identifiers
48304ca6973SEric Dumazet  * if one generator is seldom used. This makes hard for an attacker
48404ca6973SEric Dumazet  * to infer how many packets were sent between two points in time.
48504ca6973SEric Dumazet  */
48604ca6973SEric Dumazet u32 ip_idents_reserve(u32 hash, int segs)
48704ca6973SEric Dumazet {
488355b590cSEric Dumazet 	u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
489355b590cSEric Dumazet 	atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
4906aa7de05SMark Rutland 	u32 old = READ_ONCE(*p_tstamp);
49104ca6973SEric Dumazet 	u32 now = (u32)jiffies;
492adb03115SEric Dumazet 	u32 new, delta = 0;
49304ca6973SEric Dumazet 
494355b590cSEric Dumazet 	if (old != now && cmpxchg(p_tstamp, old, now) == old)
49504ca6973SEric Dumazet 		delta = prandom_u32_max(now - old);
49604ca6973SEric Dumazet 
497adb03115SEric Dumazet 	/* Do not use atomic_add_return() as it makes UBSAN unhappy */
498adb03115SEric Dumazet 	do {
499adb03115SEric Dumazet 		old = (u32)atomic_read(p_id);
500adb03115SEric Dumazet 		new = old + delta + segs;
501adb03115SEric Dumazet 	} while (atomic_cmpxchg(p_id, old, new) != old);
502adb03115SEric Dumazet 
503adb03115SEric Dumazet 	return new - segs;
50404ca6973SEric Dumazet }
50504ca6973SEric Dumazet EXPORT_SYMBOL(ip_idents_reserve);
50673f156a6SEric Dumazet 
507b6a7719aSHannes Frederic Sowa void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
5081da177e4SLinus Torvalds {
50973f156a6SEric Dumazet 	u32 hash, id;
5101da177e4SLinus Torvalds 
511df453700SEric Dumazet 	/* Note the following code is not safe, but this is okay. */
512df453700SEric Dumazet 	if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
513df453700SEric Dumazet 		get_random_bytes(&net->ipv4.ip_id_key,
514df453700SEric Dumazet 				 sizeof(net->ipv4.ip_id_key));
5151da177e4SLinus Torvalds 
516df453700SEric Dumazet 	hash = siphash_3u32((__force u32)iph->daddr,
51704ca6973SEric Dumazet 			    (__force u32)iph->saddr,
518df453700SEric Dumazet 			    iph->protocol,
519df453700SEric Dumazet 			    &net->ipv4.ip_id_key);
52073f156a6SEric Dumazet 	id = ip_idents_reserve(hash, segs);
52173f156a6SEric Dumazet 	iph->id = htons(id);
5221da177e4SLinus Torvalds }
5234bc2f18bSEric Dumazet EXPORT_SYMBOL(__ip_select_ident);
5241da177e4SLinus Torvalds 
525e2d118a1SLorenzo Colitti static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
526e2d118a1SLorenzo Colitti 			     const struct sock *sk,
5274895c771SDavid S. Miller 			     const struct iphdr *iph,
5284895c771SDavid S. Miller 			     int oif, u8 tos,
5294895c771SDavid S. Miller 			     u8 prot, u32 mark, int flow_flags)
5304895c771SDavid S. Miller {
5314895c771SDavid S. Miller 	if (sk) {
5324895c771SDavid S. Miller 		const struct inet_sock *inet = inet_sk(sk);
5334895c771SDavid S. Miller 
5344895c771SDavid S. Miller 		oif = sk->sk_bound_dev_if;
5354895c771SDavid S. Miller 		mark = sk->sk_mark;
5364895c771SDavid S. Miller 		tos = RT_CONN_FLAGS(sk);
5374895c771SDavid S. Miller 		prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
5384895c771SDavid S. Miller 	}
5394895c771SDavid S. Miller 	flowi4_init_output(fl4, oif, mark, tos,
5404895c771SDavid S. Miller 			   RT_SCOPE_UNIVERSE, prot,
5414895c771SDavid S. Miller 			   flow_flags,
542e2d118a1SLorenzo Colitti 			   iph->daddr, iph->saddr, 0, 0,
543e2d118a1SLorenzo Colitti 			   sock_net_uid(net, sk));
5444895c771SDavid S. Miller }
5454895c771SDavid S. Miller 
5465abf7f7eSEric Dumazet static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
5475abf7f7eSEric Dumazet 			       const struct sock *sk)
5484895c771SDavid S. Miller {
549d109e61bSLorenzo Colitti 	const struct net *net = dev_net(skb->dev);
5504895c771SDavid S. Miller 	const struct iphdr *iph = ip_hdr(skb);
5514895c771SDavid S. Miller 	int oif = skb->dev->ifindex;
5524895c771SDavid S. Miller 	u8 tos = RT_TOS(iph->tos);
5534895c771SDavid S. Miller 	u8 prot = iph->protocol;
5544895c771SDavid S. Miller 	u32 mark = skb->mark;
5554895c771SDavid S. Miller 
556d109e61bSLorenzo Colitti 	__build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
5574895c771SDavid S. Miller }
5584895c771SDavid S. Miller 
5595abf7f7eSEric Dumazet static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
5604895c771SDavid S. Miller {
5614895c771SDavid S. Miller 	const struct inet_sock *inet = inet_sk(sk);
5625abf7f7eSEric Dumazet 	const struct ip_options_rcu *inet_opt;
5634895c771SDavid S. Miller 	__be32 daddr = inet->inet_daddr;
5644895c771SDavid S. Miller 
5654895c771SDavid S. Miller 	rcu_read_lock();
5664895c771SDavid S. Miller 	inet_opt = rcu_dereference(inet->inet_opt);
5674895c771SDavid S. Miller 	if (inet_opt && inet_opt->opt.srr)
5684895c771SDavid S. Miller 		daddr = inet_opt->opt.faddr;
5694895c771SDavid S. Miller 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
5704895c771SDavid S. Miller 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
5714895c771SDavid S. Miller 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
5724895c771SDavid S. Miller 			   inet_sk_flowi_flags(sk),
573e2d118a1SLorenzo Colitti 			   daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
5744895c771SDavid S. Miller 	rcu_read_unlock();
5754895c771SDavid S. Miller }
5764895c771SDavid S. Miller 
5775abf7f7eSEric Dumazet static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
5785abf7f7eSEric Dumazet 				 const struct sk_buff *skb)
5794895c771SDavid S. Miller {
5804895c771SDavid S. Miller 	if (skb)
5814895c771SDavid S. Miller 		build_skb_flow_key(fl4, skb, sk);
5824895c771SDavid S. Miller 	else
5834895c771SDavid S. Miller 		build_sk_flow_key(fl4, sk);
5844895c771SDavid S. Miller }
5854895c771SDavid S. Miller 
586c5038a83SDavid S. Miller static DEFINE_SPINLOCK(fnhe_lock);
5874895c771SDavid S. Miller 
5882ffae99dSTimo Teräs static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
5892ffae99dSTimo Teräs {
5902ffae99dSTimo Teräs 	struct rtable *rt;
5912ffae99dSTimo Teräs 
5922ffae99dSTimo Teräs 	rt = rcu_dereference(fnhe->fnhe_rth_input);
5932ffae99dSTimo Teräs 	if (rt) {
5942ffae99dSTimo Teräs 		RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
59595c47f9cSWei Wang 		dst_dev_put(&rt->dst);
5960830106cSWei Wang 		dst_release(&rt->dst);
5972ffae99dSTimo Teräs 	}
5982ffae99dSTimo Teräs 	rt = rcu_dereference(fnhe->fnhe_rth_output);
5992ffae99dSTimo Teräs 	if (rt) {
6002ffae99dSTimo Teräs 		RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
60195c47f9cSWei Wang 		dst_dev_put(&rt->dst);
6020830106cSWei Wang 		dst_release(&rt->dst);
6032ffae99dSTimo Teräs 	}
6042ffae99dSTimo Teräs }
6052ffae99dSTimo Teräs 
606aee06da6SJulian Anastasov static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
6074895c771SDavid S. Miller {
6084895c771SDavid S. Miller 	struct fib_nh_exception *fnhe, *oldest;
6094895c771SDavid S. Miller 
6104895c771SDavid S. Miller 	oldest = rcu_dereference(hash->chain);
6114895c771SDavid S. Miller 	for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
6124895c771SDavid S. Miller 	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
6134895c771SDavid S. Miller 		if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
6144895c771SDavid S. Miller 			oldest = fnhe;
6154895c771SDavid S. Miller 	}
6162ffae99dSTimo Teräs 	fnhe_flush_routes(oldest);
6174895c771SDavid S. Miller 	return oldest;
6184895c771SDavid S. Miller }
6194895c771SDavid S. Miller 
620d3a25c98SDavid S. Miller static inline u32 fnhe_hashfun(__be32 daddr)
621d3a25c98SDavid S. Miller {
622d546c621SEric Dumazet 	static u32 fnhe_hashrnd __read_mostly;
623d3a25c98SDavid S. Miller 	u32 hval;
624d3a25c98SDavid S. Miller 
625d546c621SEric Dumazet 	net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
626d546c621SEric Dumazet 	hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
627d546c621SEric Dumazet 	return hash_32(hval, FNHE_HASH_SHIFT);
628d3a25c98SDavid S. Miller }
629d3a25c98SDavid S. Miller 
630387aa65aSTimo Teräs static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
631387aa65aSTimo Teräs {
632387aa65aSTimo Teräs 	rt->rt_pmtu = fnhe->fnhe_pmtu;
633d52e5a7eSSabrina Dubroca 	rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
634387aa65aSTimo Teräs 	rt->dst.expires = fnhe->fnhe_expires;
635387aa65aSTimo Teräs 
636387aa65aSTimo Teräs 	if (fnhe->fnhe_gw) {
637387aa65aSTimo Teräs 		rt->rt_flags |= RTCF_REDIRECTED;
6381550c171SDavid Ahern 		rt->rt_gw_family = AF_INET;
6391550c171SDavid Ahern 		rt->rt_gw4 = fnhe->fnhe_gw;
640387aa65aSTimo Teräs 	}
641387aa65aSTimo Teräs }
642387aa65aSTimo Teräs 
643a5995e71SDavid Ahern static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
644a5995e71SDavid Ahern 				  __be32 gw, u32 pmtu, bool lock,
645a5995e71SDavid Ahern 				  unsigned long expires)
6464895c771SDavid S. Miller {
647aee06da6SJulian Anastasov 	struct fnhe_hash_bucket *hash;
6484895c771SDavid S. Miller 	struct fib_nh_exception *fnhe;
649387aa65aSTimo Teräs 	struct rtable *rt;
650cebe84c6SXin Long 	u32 genid, hval;
651387aa65aSTimo Teräs 	unsigned int i;
6524895c771SDavid S. Miller 	int depth;
653cebe84c6SXin Long 
654a5995e71SDavid Ahern 	genid = fnhe_genid(dev_net(nhc->nhc_dev));
655cebe84c6SXin Long 	hval = fnhe_hashfun(daddr);
6564895c771SDavid S. Miller 
657c5038a83SDavid S. Miller 	spin_lock_bh(&fnhe_lock);
658aee06da6SJulian Anastasov 
659a5995e71SDavid Ahern 	hash = rcu_dereference(nhc->nhc_exceptions);
6604895c771SDavid S. Miller 	if (!hash) {
6616396bb22SKees Cook 		hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
6624895c771SDavid S. Miller 		if (!hash)
663aee06da6SJulian Anastasov 			goto out_unlock;
664a5995e71SDavid Ahern 		rcu_assign_pointer(nhc->nhc_exceptions, hash);
6654895c771SDavid S. Miller 	}
6664895c771SDavid S. Miller 
6674895c771SDavid S. Miller 	hash += hval;
6684895c771SDavid S. Miller 
6694895c771SDavid S. Miller 	depth = 0;
6704895c771SDavid S. Miller 	for (fnhe = rcu_dereference(hash->chain); fnhe;
6714895c771SDavid S. Miller 	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
6724895c771SDavid S. Miller 		if (fnhe->fnhe_daddr == daddr)
673aee06da6SJulian Anastasov 			break;
6744895c771SDavid S. Miller 		depth++;
6754895c771SDavid S. Miller 	}
6764895c771SDavid S. Miller 
677aee06da6SJulian Anastasov 	if (fnhe) {
678cebe84c6SXin Long 		if (fnhe->fnhe_genid != genid)
679cebe84c6SXin Long 			fnhe->fnhe_genid = genid;
680aee06da6SJulian Anastasov 		if (gw)
681aee06da6SJulian Anastasov 			fnhe->fnhe_gw = gw;
682d52e5a7eSSabrina Dubroca 		if (pmtu) {
683aee06da6SJulian Anastasov 			fnhe->fnhe_pmtu = pmtu;
684d52e5a7eSSabrina Dubroca 			fnhe->fnhe_mtu_locked = lock;
685d52e5a7eSSabrina Dubroca 		}
686387aa65aSTimo Teräs 		fnhe->fnhe_expires = max(1UL, expires);
687387aa65aSTimo Teräs 		/* Update all cached dsts too */
6882ffae99dSTimo Teräs 		rt = rcu_dereference(fnhe->fnhe_rth_input);
6892ffae99dSTimo Teräs 		if (rt)
6902ffae99dSTimo Teräs 			fill_route_from_fnhe(rt, fnhe);
6912ffae99dSTimo Teräs 		rt = rcu_dereference(fnhe->fnhe_rth_output);
692387aa65aSTimo Teräs 		if (rt)
693387aa65aSTimo Teräs 			fill_route_from_fnhe(rt, fnhe);
694aee06da6SJulian Anastasov 	} else {
695aee06da6SJulian Anastasov 		if (depth > FNHE_RECLAIM_DEPTH)
696aee06da6SJulian Anastasov 			fnhe = fnhe_oldest(hash);
697aee06da6SJulian Anastasov 		else {
6984895c771SDavid S. Miller 			fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
6994895c771SDavid S. Miller 			if (!fnhe)
700aee06da6SJulian Anastasov 				goto out_unlock;
7014895c771SDavid S. Miller 
7024895c771SDavid S. Miller 			fnhe->fnhe_next = hash->chain;
7034895c771SDavid S. Miller 			rcu_assign_pointer(hash->chain, fnhe);
704aee06da6SJulian Anastasov 		}
705cebe84c6SXin Long 		fnhe->fnhe_genid = genid;
7064895c771SDavid S. Miller 		fnhe->fnhe_daddr = daddr;
707aee06da6SJulian Anastasov 		fnhe->fnhe_gw = gw;
708aee06da6SJulian Anastasov 		fnhe->fnhe_pmtu = pmtu;
709d52e5a7eSSabrina Dubroca 		fnhe->fnhe_mtu_locked = lock;
71094720e3aSJulian Anastasov 		fnhe->fnhe_expires = max(1UL, expires);
711387aa65aSTimo Teräs 
712387aa65aSTimo Teräs 		/* Exception created; mark the cached routes for the nexthop
713387aa65aSTimo Teräs 		 * stale, so anyone caching it rechecks if this exception
714387aa65aSTimo Teräs 		 * applies to them.
715387aa65aSTimo Teräs 		 */
7160f457a36SDavid Ahern 		rt = rcu_dereference(nhc->nhc_rth_input);
7172ffae99dSTimo Teräs 		if (rt)
7182ffae99dSTimo Teräs 			rt->dst.obsolete = DST_OBSOLETE_KILL;
7192ffae99dSTimo Teräs 
720387aa65aSTimo Teräs 		for_each_possible_cpu(i) {
721387aa65aSTimo Teräs 			struct rtable __rcu **prt;
7220f457a36SDavid Ahern 			prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
723387aa65aSTimo Teräs 			rt = rcu_dereference(*prt);
724387aa65aSTimo Teräs 			if (rt)
725387aa65aSTimo Teräs 				rt->dst.obsolete = DST_OBSOLETE_KILL;
726387aa65aSTimo Teräs 		}
727aee06da6SJulian Anastasov 	}
728aee06da6SJulian Anastasov 
7294895c771SDavid S. Miller 	fnhe->fnhe_stamp = jiffies;
730aee06da6SJulian Anastasov 
731aee06da6SJulian Anastasov out_unlock:
732c5038a83SDavid S. Miller 	spin_unlock_bh(&fnhe_lock);
7334895c771SDavid S. Miller }
7344895c771SDavid S. Miller 
735ceb33206SDavid S. Miller static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
736ceb33206SDavid S. Miller 			     bool kill_route)
7371da177e4SLinus Torvalds {
738e47a185bSDavid S. Miller 	__be32 new_gw = icmp_hdr(skb)->un.gateway;
73994206125SDavid S. Miller 	__be32 old_gw = ip_hdr(skb)->saddr;
740e47a185bSDavid S. Miller 	struct net_device *dev = skb->dev;
741e47a185bSDavid S. Miller 	struct in_device *in_dev;
7424895c771SDavid S. Miller 	struct fib_result res;
743e47a185bSDavid S. Miller 	struct neighbour *n;
744317805b8SDenis V. Lunev 	struct net *net;
7451da177e4SLinus Torvalds 
74694206125SDavid S. Miller 	switch (icmp_hdr(skb)->code & 7) {
74794206125SDavid S. Miller 	case ICMP_REDIR_NET:
74894206125SDavid S. Miller 	case ICMP_REDIR_NETTOS:
74994206125SDavid S. Miller 	case ICMP_REDIR_HOST:
75094206125SDavid S. Miller 	case ICMP_REDIR_HOSTTOS:
75194206125SDavid S. Miller 		break;
75294206125SDavid S. Miller 
75394206125SDavid S. Miller 	default:
75494206125SDavid S. Miller 		return;
75594206125SDavid S. Miller 	}
75694206125SDavid S. Miller 
7571550c171SDavid Ahern 	if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
758e47a185bSDavid S. Miller 		return;
759e47a185bSDavid S. Miller 
760e47a185bSDavid S. Miller 	in_dev = __in_dev_get_rcu(dev);
761e47a185bSDavid S. Miller 	if (!in_dev)
762e47a185bSDavid S. Miller 		return;
763e47a185bSDavid S. Miller 
764c346dca1SYOSHIFUJI Hideaki 	net = dev_net(dev);
7659d4fb27dSJoe Perches 	if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
7669d4fb27dSJoe Perches 	    ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
7679d4fb27dSJoe Perches 	    ipv4_is_zeronet(new_gw))
7681da177e4SLinus Torvalds 		goto reject_redirect;
7691da177e4SLinus Torvalds 
7701da177e4SLinus Torvalds 	if (!IN_DEV_SHARED_MEDIA(in_dev)) {
7711da177e4SLinus Torvalds 		if (!inet_addr_onlink(in_dev, new_gw, old_gw))
7721da177e4SLinus Torvalds 			goto reject_redirect;
7731da177e4SLinus Torvalds 		if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
7741da177e4SLinus Torvalds 			goto reject_redirect;
7751da177e4SLinus Torvalds 	} else {
776317805b8SDenis V. Lunev 		if (inet_addr_type(net, new_gw) != RTN_UNICAST)
7771da177e4SLinus Torvalds 			goto reject_redirect;
7781da177e4SLinus Torvalds 	}
7791da177e4SLinus Torvalds 
780969447f2SStephen Suryaputra Lin 	n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
781969447f2SStephen Suryaputra Lin 	if (!n)
782969447f2SStephen Suryaputra Lin 		n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
7832c1a4311SWANG Cong 	if (!IS_ERR(n)) {
784e47a185bSDavid S. Miller 		if (!(n->nud_state & NUD_VALID)) {
785e47a185bSDavid S. Miller 			neigh_event_send(n, NULL);
786e47a185bSDavid S. Miller 		} else {
7870eeb075fSAndy Gospodarek 			if (fib_lookup(net, fl4, &res, 0) == 0) {
788eba618abSDavid Ahern 				struct fib_nh_common *nhc = FIB_RES_NHC(res);
7894895c771SDavid S. Miller 
790a5995e71SDavid Ahern 				update_or_create_fnhe(nhc, fl4->daddr, new_gw,
791d52e5a7eSSabrina Dubroca 						0, false,
792d52e5a7eSSabrina Dubroca 						jiffies + ip_rt_gc_timeout);
7934895c771SDavid S. Miller 			}
794ceb33206SDavid S. Miller 			if (kill_route)
795ceb33206SDavid S. Miller 				rt->dst.obsolete = DST_OBSOLETE_KILL;
796e47a185bSDavid S. Miller 			call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
797e47a185bSDavid S. Miller 		}
798e47a185bSDavid S. Miller 		neigh_release(n);
799e47a185bSDavid S. Miller 	}
800e47a185bSDavid S. Miller 	return;
801e47a185bSDavid S. Miller 
802e47a185bSDavid S. Miller reject_redirect:
803e47a185bSDavid S. Miller #ifdef CONFIG_IP_ROUTE_VERBOSE
80499ee038dSDavid S. Miller 	if (IN_DEV_LOG_MARTIANS(in_dev)) {
80599ee038dSDavid S. Miller 		const struct iphdr *iph = (const struct iphdr *) skb->data;
80699ee038dSDavid S. Miller 		__be32 daddr = iph->daddr;
80799ee038dSDavid S. Miller 		__be32 saddr = iph->saddr;
80899ee038dSDavid S. Miller 
809e47a185bSDavid S. Miller 		net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
810e47a185bSDavid S. Miller 				     "  Advised path = %pI4 -> %pI4\n",
811e47a185bSDavid S. Miller 				     &old_gw, dev->name, &new_gw,
812e47a185bSDavid S. Miller 				     &saddr, &daddr);
81399ee038dSDavid S. Miller 	}
814e47a185bSDavid S. Miller #endif
815e47a185bSDavid S. Miller 	;
816e47a185bSDavid S. Miller }
817e47a185bSDavid S. Miller 
8184895c771SDavid S. Miller static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
8194895c771SDavid S. Miller {
8204895c771SDavid S. Miller 	struct rtable *rt;
8214895c771SDavid S. Miller 	struct flowi4 fl4;
822f96ef988SMichal Kubecek 	const struct iphdr *iph = (const struct iphdr *) skb->data;
8237d995694SLorenzo Colitti 	struct net *net = dev_net(skb->dev);
824f96ef988SMichal Kubecek 	int oif = skb->dev->ifindex;
825f96ef988SMichal Kubecek 	u8 tos = RT_TOS(iph->tos);
826f96ef988SMichal Kubecek 	u8 prot = iph->protocol;
827f96ef988SMichal Kubecek 	u32 mark = skb->mark;
8284895c771SDavid S. Miller 
8294895c771SDavid S. Miller 	rt = (struct rtable *) dst;
8304895c771SDavid S. Miller 
8317d995694SLorenzo Colitti 	__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
832ceb33206SDavid S. Miller 	__ip_do_redirect(rt, skb, &fl4, true);
8334895c771SDavid S. Miller }
8344895c771SDavid S. Miller 
8351da177e4SLinus Torvalds static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
8361da177e4SLinus Torvalds {
8371da177e4SLinus Torvalds 	struct rtable *rt = (struct rtable *)dst;
8381da177e4SLinus Torvalds 	struct dst_entry *ret = dst;
8391da177e4SLinus Torvalds 
8401da177e4SLinus Torvalds 	if (rt) {
841d11a4dc1STimo Teräs 		if (dst->obsolete > 0) {
8421da177e4SLinus Torvalds 			ip_rt_put(rt);
8431da177e4SLinus Torvalds 			ret = NULL;
8445943634fSDavid S. Miller 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
8455943634fSDavid S. Miller 			   rt->dst.expires) {
84689aef892SDavid S. Miller 			ip_rt_put(rt);
8471da177e4SLinus Torvalds 			ret = NULL;
8481da177e4SLinus Torvalds 		}
8491da177e4SLinus Torvalds 	}
8501da177e4SLinus Torvalds 	return ret;
8511da177e4SLinus Torvalds }
8521da177e4SLinus Torvalds 
8531da177e4SLinus Torvalds /*
8541da177e4SLinus Torvalds  * Algorithm:
8551da177e4SLinus Torvalds  *	1. The first ip_rt_redirect_number redirects are sent
8561da177e4SLinus Torvalds  *	   with exponential backoff, then we stop sending them at all,
8571da177e4SLinus Torvalds  *	   assuming that the host ignores our redirects.
8581da177e4SLinus Torvalds  *	2. If we did not see packets requiring redirects
8591da177e4SLinus Torvalds  *	   during ip_rt_redirect_silence, we assume that the host
8601da177e4SLinus Torvalds  *	   forgot redirected route and start to send redirects again.
8611da177e4SLinus Torvalds  *
8621da177e4SLinus Torvalds  * This algorithm is much cheaper and more intelligent than dumb load limiting
8631da177e4SLinus Torvalds  * in icmp.c.
8641da177e4SLinus Torvalds  *
8651da177e4SLinus Torvalds  * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
8661da177e4SLinus Torvalds  * and "frag. need" (breaks PMTU discovery) in icmp.c.
8671da177e4SLinus Torvalds  */
8681da177e4SLinus Torvalds 
8691da177e4SLinus Torvalds void ip_rt_send_redirect(struct sk_buff *skb)
8701da177e4SLinus Torvalds {
871511c3f92SEric Dumazet 	struct rtable *rt = skb_rtable(skb);
87230038fc6SEric Dumazet 	struct in_device *in_dev;
87392d86829SDavid S. Miller 	struct inet_peer *peer;
8741d861aa4SDavid S. Miller 	struct net *net;
87530038fc6SEric Dumazet 	int log_martians;
876192132b9SDavid Ahern 	int vif;
8771da177e4SLinus Torvalds 
87830038fc6SEric Dumazet 	rcu_read_lock();
879d8d1f30bSChangli Gao 	in_dev = __in_dev_get_rcu(rt->dst.dev);
88030038fc6SEric Dumazet 	if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
88130038fc6SEric Dumazet 		rcu_read_unlock();
8821da177e4SLinus Torvalds 		return;
88330038fc6SEric Dumazet 	}
88430038fc6SEric Dumazet 	log_martians = IN_DEV_LOG_MARTIANS(in_dev);
885385add90SDavid Ahern 	vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
88630038fc6SEric Dumazet 	rcu_read_unlock();
8871da177e4SLinus Torvalds 
8881d861aa4SDavid S. Miller 	net = dev_net(rt->dst.dev);
889192132b9SDavid Ahern 	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
89092d86829SDavid S. Miller 	if (!peer) {
891e81da0e1SJulian Anastasov 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
892e81da0e1SJulian Anastasov 			  rt_nexthop(rt, ip_hdr(skb)->daddr));
89392d86829SDavid S. Miller 		return;
89492d86829SDavid S. Miller 	}
89592d86829SDavid S. Miller 
8961da177e4SLinus Torvalds 	/* No redirected packets during ip_rt_redirect_silence;
8971da177e4SLinus Torvalds 	 * reset the algorithm.
8981da177e4SLinus Torvalds 	 */
899c09551c6SLorenzo Bianconi 	if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
90092d86829SDavid S. Miller 		peer->rate_tokens = 0;
901c09551c6SLorenzo Bianconi 		peer->n_redirects = 0;
902c09551c6SLorenzo Bianconi 	}
9031da177e4SLinus Torvalds 
9041da177e4SLinus Torvalds 	/* Too many ignored redirects; do not send anything
905d8d1f30bSChangli Gao 	 * set dst.rate_last to the last seen redirected packet.
9061da177e4SLinus Torvalds 	 */
907c09551c6SLorenzo Bianconi 	if (peer->n_redirects >= ip_rt_redirect_number) {
90892d86829SDavid S. Miller 		peer->rate_last = jiffies;
9091d861aa4SDavid S. Miller 		goto out_put_peer;
9101da177e4SLinus Torvalds 	}
9111da177e4SLinus Torvalds 
9121da177e4SLinus Torvalds 	/* Check for load limit; set rate_last to the latest sent
9131da177e4SLinus Torvalds 	 * redirect.
9141da177e4SLinus Torvalds 	 */
91592d86829SDavid S. Miller 	if (peer->rate_tokens == 0 ||
91614fb8a76SLi Yewang 	    time_after(jiffies,
91792d86829SDavid S. Miller 		       (peer->rate_last +
91892d86829SDavid S. Miller 			(ip_rt_redirect_load << peer->rate_tokens)))) {
919e81da0e1SJulian Anastasov 		__be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
920e81da0e1SJulian Anastasov 
921e81da0e1SJulian Anastasov 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
92292d86829SDavid S. Miller 		peer->rate_last = jiffies;
92392d86829SDavid S. Miller 		++peer->rate_tokens;
924c09551c6SLorenzo Bianconi 		++peer->n_redirects;
9251da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE
92630038fc6SEric Dumazet 		if (log_martians &&
927e87cc472SJoe Perches 		    peer->rate_tokens == ip_rt_redirect_number)
928e87cc472SJoe Perches 			net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
92992101b3bSDavid S. Miller 					     &ip_hdr(skb)->saddr, inet_iif(skb),
930e81da0e1SJulian Anastasov 					     &ip_hdr(skb)->daddr, &gw);
9311da177e4SLinus Torvalds #endif
9321da177e4SLinus Torvalds 	}
9331d861aa4SDavid S. Miller out_put_peer:
9341d861aa4SDavid S. Miller 	inet_putpeer(peer);
9351da177e4SLinus Torvalds }
9361da177e4SLinus Torvalds 
9371da177e4SLinus Torvalds static int ip_error(struct sk_buff *skb)
9381da177e4SLinus Torvalds {
939511c3f92SEric Dumazet 	struct rtable *rt = skb_rtable(skb);
940e2c0dc1fSStephen Suryaputra 	struct net_device *dev = skb->dev;
941e2c0dc1fSStephen Suryaputra 	struct in_device *in_dev;
94292d86829SDavid S. Miller 	struct inet_peer *peer;
9431da177e4SLinus Torvalds 	unsigned long now;
944251da413SDavid S. Miller 	struct net *net;
94592d86829SDavid S. Miller 	bool send;
9461da177e4SLinus Torvalds 	int code;
9471da177e4SLinus Torvalds 
948e2c0dc1fSStephen Suryaputra 	if (netif_is_l3_master(skb->dev)) {
949e2c0dc1fSStephen Suryaputra 		dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
950e2c0dc1fSStephen Suryaputra 		if (!dev)
951e2c0dc1fSStephen Suryaputra 			goto out;
952e2c0dc1fSStephen Suryaputra 	}
953e2c0dc1fSStephen Suryaputra 
954e2c0dc1fSStephen Suryaputra 	in_dev = __in_dev_get_rcu(dev);
955e2c0dc1fSStephen Suryaputra 
956381c759dSEric W. Biederman 	/* IP on this device is disabled. */
957381c759dSEric W. Biederman 	if (!in_dev)
958381c759dSEric W. Biederman 		goto out;
959381c759dSEric W. Biederman 
960251da413SDavid S. Miller 	net = dev_net(rt->dst.dev);
961251da413SDavid S. Miller 	if (!IN_DEV_FORWARD(in_dev)) {
962251da413SDavid S. Miller 		switch (rt->dst.error) {
963251da413SDavid S. Miller 		case EHOSTUNREACH:
964b45386efSEric Dumazet 			__IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
965251da413SDavid S. Miller 			break;
966251da413SDavid S. Miller 
967251da413SDavid S. Miller 		case ENETUNREACH:
968b45386efSEric Dumazet 			__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
969251da413SDavid S. Miller 			break;
970251da413SDavid S. Miller 		}
971251da413SDavid S. Miller 		goto out;
972251da413SDavid S. Miller 	}
973251da413SDavid S. Miller 
974d8d1f30bSChangli Gao 	switch (rt->dst.error) {
9751da177e4SLinus Torvalds 	case EINVAL:
9761da177e4SLinus Torvalds 	default:
9771da177e4SLinus Torvalds 		goto out;
9781da177e4SLinus Torvalds 	case EHOSTUNREACH:
9791da177e4SLinus Torvalds 		code = ICMP_HOST_UNREACH;
9801da177e4SLinus Torvalds 		break;
9811da177e4SLinus Torvalds 	case ENETUNREACH:
9821da177e4SLinus Torvalds 		code = ICMP_NET_UNREACH;
983b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
9841da177e4SLinus Torvalds 		break;
9851da177e4SLinus Torvalds 	case EACCES:
9861da177e4SLinus Torvalds 		code = ICMP_PKT_FILTERED;
9871da177e4SLinus Torvalds 		break;
9881da177e4SLinus Torvalds 	}
9891da177e4SLinus Torvalds 
990192132b9SDavid Ahern 	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
991385add90SDavid Ahern 			       l3mdev_master_ifindex(skb->dev), 1);
99292d86829SDavid S. Miller 
99392d86829SDavid S. Miller 	send = true;
99492d86829SDavid S. Miller 	if (peer) {
9951da177e4SLinus Torvalds 		now = jiffies;
99692d86829SDavid S. Miller 		peer->rate_tokens += now - peer->rate_last;
99792d86829SDavid S. Miller 		if (peer->rate_tokens > ip_rt_error_burst)
99892d86829SDavid S. Miller 			peer->rate_tokens = ip_rt_error_burst;
99992d86829SDavid S. Miller 		peer->rate_last = now;
100092d86829SDavid S. Miller 		if (peer->rate_tokens >= ip_rt_error_cost)
100192d86829SDavid S. Miller 			peer->rate_tokens -= ip_rt_error_cost;
100292d86829SDavid S. Miller 		else
100392d86829SDavid S. Miller 			send = false;
10041d861aa4SDavid S. Miller 		inet_putpeer(peer);
10051da177e4SLinus Torvalds 	}
100692d86829SDavid S. Miller 	if (send)
100792d86829SDavid S. Miller 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
10081da177e4SLinus Torvalds 
10091da177e4SLinus Torvalds out:	kfree_skb(skb);
10101da177e4SLinus Torvalds 	return 0;
10111da177e4SLinus Torvalds }
10121da177e4SLinus Torvalds 
1013d851c12bSSteffen Klassert static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
10141da177e4SLinus Torvalds {
1015d851c12bSSteffen Klassert 	struct dst_entry *dst = &rt->dst;
101628d35bcdSSabrina Dubroca 	u32 old_mtu = ipv4_mtu(dst);
10174895c771SDavid S. Miller 	struct fib_result res;
1018d52e5a7eSSabrina Dubroca 	bool lock = false;
10192c8cec5cSDavid S. Miller 
1020d52e5a7eSSabrina Dubroca 	if (ip_mtu_locked(dst))
1021fa1e492aSSteffen Klassert 		return;
1022fa1e492aSSteffen Klassert 
102328d35bcdSSabrina Dubroca 	if (old_mtu < mtu)
10243cdaa5beSLi Wei 		return;
10253cdaa5beSLi Wei 
1026d52e5a7eSSabrina Dubroca 	if (mtu < ip_rt_min_pmtu) {
1027d52e5a7eSSabrina Dubroca 		lock = true;
102828d35bcdSSabrina Dubroca 		mtu = min(old_mtu, ip_rt_min_pmtu);
1029d52e5a7eSSabrina Dubroca 	}
103046af3180SHiroaki SHIMODA 
103128d35bcdSSabrina Dubroca 	if (rt->rt_pmtu == mtu && !lock &&
1032f016229eSTimo Teräs 	    time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1033f016229eSTimo Teräs 		return;
1034f016229eSTimo Teräs 
1035c5ae7d41SEric Dumazet 	rcu_read_lock();
10360eeb075fSAndy Gospodarek 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
1037eba618abSDavid Ahern 		struct fib_nh_common *nhc = FIB_RES_NHC(res);
10384895c771SDavid S. Miller 
1039a5995e71SDavid Ahern 		update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
1040aee06da6SJulian Anastasov 				      jiffies + ip_rt_mtu_expires);
10414895c771SDavid S. Miller 	}
1042c5ae7d41SEric Dumazet 	rcu_read_unlock();
10431da177e4SLinus Torvalds }
10441da177e4SLinus Torvalds 
10454895c771SDavid S. Miller static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
10464895c771SDavid S. Miller 			      struct sk_buff *skb, u32 mtu)
10474895c771SDavid S. Miller {
10484895c771SDavid S. Miller 	struct rtable *rt = (struct rtable *) dst;
10494895c771SDavid S. Miller 	struct flowi4 fl4;
10504895c771SDavid S. Miller 
10514895c771SDavid S. Miller 	ip_rt_build_flow_key(&fl4, sk, skb);
1052d851c12bSSteffen Klassert 	__ip_rt_update_pmtu(rt, &fl4, mtu);
10534895c771SDavid S. Miller }
10544895c771SDavid S. Miller 
105536393395SDavid S. Miller void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1056d888f396SMaciej Żenczykowski 		      int oif, u8 protocol)
105736393395SDavid S. Miller {
105836393395SDavid S. Miller 	const struct iphdr *iph = (const struct iphdr *) skb->data;
105936393395SDavid S. Miller 	struct flowi4 fl4;
106036393395SDavid S. Miller 	struct rtable *rt;
1061d888f396SMaciej Żenczykowski 	u32 mark = IP4_REPLY_MARK(net, skb->mark);
10621b3c61dcSLorenzo Colitti 
1063e2d118a1SLorenzo Colitti 	__build_flow_key(net, &fl4, NULL, iph, oif,
1064d888f396SMaciej Żenczykowski 			 RT_TOS(iph->tos), protocol, mark, 0);
106536393395SDavid S. Miller 	rt = __ip_route_output_key(net, &fl4);
106636393395SDavid S. Miller 	if (!IS_ERR(rt)) {
10674895c771SDavid S. Miller 		__ip_rt_update_pmtu(rt, &fl4, mtu);
106836393395SDavid S. Miller 		ip_rt_put(rt);
106936393395SDavid S. Miller 	}
107036393395SDavid S. Miller }
107136393395SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
107236393395SDavid S. Miller 
10739cb3a50cSSteffen Klassert static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
107436393395SDavid S. Miller {
10754895c771SDavid S. Miller 	const struct iphdr *iph = (const struct iphdr *) skb->data;
10764895c771SDavid S. Miller 	struct flowi4 fl4;
10774895c771SDavid S. Miller 	struct rtable *rt;
107836393395SDavid S. Miller 
1079e2d118a1SLorenzo Colitti 	__build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
10801b3c61dcSLorenzo Colitti 
10811b3c61dcSLorenzo Colitti 	if (!fl4.flowi4_mark)
10821b3c61dcSLorenzo Colitti 		fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
10831b3c61dcSLorenzo Colitti 
10844895c771SDavid S. Miller 	rt = __ip_route_output_key(sock_net(sk), &fl4);
10854895c771SDavid S. Miller 	if (!IS_ERR(rt)) {
10864895c771SDavid S. Miller 		__ip_rt_update_pmtu(rt, &fl4, mtu);
10874895c771SDavid S. Miller 		ip_rt_put(rt);
10884895c771SDavid S. Miller 	}
108936393395SDavid S. Miller }
10909cb3a50cSSteffen Klassert 
10919cb3a50cSSteffen Klassert void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
10929cb3a50cSSteffen Klassert {
10939cb3a50cSSteffen Klassert 	const struct iphdr *iph = (const struct iphdr *) skb->data;
10949cb3a50cSSteffen Klassert 	struct flowi4 fl4;
10959cb3a50cSSteffen Klassert 	struct rtable *rt;
10967f502361SEric Dumazet 	struct dst_entry *odst = NULL;
1097b44108dbSSteffen Klassert 	bool new = false;
1098e2d118a1SLorenzo Colitti 	struct net *net = sock_net(sk);
10999cb3a50cSSteffen Klassert 
11009cb3a50cSSteffen Klassert 	bh_lock_sock(sk);
1101482fc609SHannes Frederic Sowa 
1102482fc609SHannes Frederic Sowa 	if (!ip_sk_accept_pmtu(sk))
1103482fc609SHannes Frederic Sowa 		goto out;
1104482fc609SHannes Frederic Sowa 
11057f502361SEric Dumazet 	odst = sk_dst_get(sk);
11069cb3a50cSSteffen Klassert 
11077f502361SEric Dumazet 	if (sock_owned_by_user(sk) || !odst) {
11089cb3a50cSSteffen Klassert 		__ipv4_sk_update_pmtu(skb, sk, mtu);
11099cb3a50cSSteffen Klassert 		goto out;
11109cb3a50cSSteffen Klassert 	}
11119cb3a50cSSteffen Klassert 
1112e2d118a1SLorenzo Colitti 	__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
11139cb3a50cSSteffen Klassert 
11147f502361SEric Dumazet 	rt = (struct rtable *)odst;
111551456b29SIan Morris 	if (odst->obsolete && !odst->ops->check(odst, 0)) {
11169cb3a50cSSteffen Klassert 		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
11179cb3a50cSSteffen Klassert 		if (IS_ERR(rt))
11189cb3a50cSSteffen Klassert 			goto out;
1119b44108dbSSteffen Klassert 
1120b44108dbSSteffen Klassert 		new = true;
11219cb3a50cSSteffen Klassert 	}
11229cb3a50cSSteffen Klassert 
11230f6c480fSDavid Miller 	__ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
11249cb3a50cSSteffen Klassert 
11257f502361SEric Dumazet 	if (!dst_check(&rt->dst, 0)) {
1126b44108dbSSteffen Klassert 		if (new)
1127b44108dbSSteffen Klassert 			dst_release(&rt->dst);
1128b44108dbSSteffen Klassert 
11299cb3a50cSSteffen Klassert 		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
11309cb3a50cSSteffen Klassert 		if (IS_ERR(rt))
11319cb3a50cSSteffen Klassert 			goto out;
11329cb3a50cSSteffen Klassert 
1133b44108dbSSteffen Klassert 		new = true;
11349cb3a50cSSteffen Klassert 	}
11359cb3a50cSSteffen Klassert 
1136b44108dbSSteffen Klassert 	if (new)
11377f502361SEric Dumazet 		sk_dst_set(sk, &rt->dst);
11389cb3a50cSSteffen Klassert 
11399cb3a50cSSteffen Klassert out:
11409cb3a50cSSteffen Klassert 	bh_unlock_sock(sk);
11417f502361SEric Dumazet 	dst_release(odst);
11429cb3a50cSSteffen Klassert }
114336393395SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
1144f39925dbSDavid S. Miller 
1145b42597e2SDavid S. Miller void ipv4_redirect(struct sk_buff *skb, struct net *net,
11461042caa7SMaciej Żenczykowski 		   int oif, u8 protocol)
1147b42597e2SDavid S. Miller {
1148b42597e2SDavid S. Miller 	const struct iphdr *iph = (const struct iphdr *) skb->data;
1149b42597e2SDavid S. Miller 	struct flowi4 fl4;
1150b42597e2SDavid S. Miller 	struct rtable *rt;
1151b42597e2SDavid S. Miller 
1152e2d118a1SLorenzo Colitti 	__build_flow_key(net, &fl4, NULL, iph, oif,
11531042caa7SMaciej Żenczykowski 			 RT_TOS(iph->tos), protocol, 0, 0);
1154b42597e2SDavid S. Miller 	rt = __ip_route_output_key(net, &fl4);
1155b42597e2SDavid S. Miller 	if (!IS_ERR(rt)) {
1156ceb33206SDavid S. Miller 		__ip_do_redirect(rt, skb, &fl4, false);
1157b42597e2SDavid S. Miller 		ip_rt_put(rt);
1158b42597e2SDavid S. Miller 	}
1159b42597e2SDavid S. Miller }
1160b42597e2SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_redirect);
1161b42597e2SDavid S. Miller 
1162b42597e2SDavid S. Miller void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1163b42597e2SDavid S. Miller {
11644895c771SDavid S. Miller 	const struct iphdr *iph = (const struct iphdr *) skb->data;
11654895c771SDavid S. Miller 	struct flowi4 fl4;
11664895c771SDavid S. Miller 	struct rtable *rt;
1167e2d118a1SLorenzo Colitti 	struct net *net = sock_net(sk);
1168b42597e2SDavid S. Miller 
1169e2d118a1SLorenzo Colitti 	__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1170e2d118a1SLorenzo Colitti 	rt = __ip_route_output_key(net, &fl4);
11714895c771SDavid S. Miller 	if (!IS_ERR(rt)) {
1172ceb33206SDavid S. Miller 		__ip_do_redirect(rt, skb, &fl4, false);
11734895c771SDavid S. Miller 		ip_rt_put(rt);
11744895c771SDavid S. Miller 	}
1175b42597e2SDavid S. Miller }
1176b42597e2SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1177b42597e2SDavid S. Miller 
1178efbc368dSDavid S. Miller static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1179efbc368dSDavid S. Miller {
1180efbc368dSDavid S. Miller 	struct rtable *rt = (struct rtable *) dst;
1181efbc368dSDavid S. Miller 
1182ceb33206SDavid S. Miller 	/* All IPV4 dsts are created with ->obsolete set to the value
1183ceb33206SDavid S. Miller 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1184ceb33206SDavid S. Miller 	 * into this function always.
1185ceb33206SDavid S. Miller 	 *
1186387aa65aSTimo Teräs 	 * When a PMTU/redirect information update invalidates a route,
1187387aa65aSTimo Teräs 	 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
118802afc7adSJulian Wiedmann 	 * DST_OBSOLETE_DEAD.
1189ceb33206SDavid S. Miller 	 */
1190387aa65aSTimo Teräs 	if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
1191efbc368dSDavid S. Miller 		return NULL;
1192d11a4dc1STimo Teräs 	return dst;
11931da177e4SLinus Torvalds }
11941da177e4SLinus Torvalds 
119520ff83f1SEric Dumazet static void ipv4_send_dest_unreach(struct sk_buff *skb)
11961da177e4SLinus Torvalds {
1197ed0de45aSStephen Suryaputra 	struct ip_options opt;
1198c543cb4aSEric Dumazet 	int res;
11991da177e4SLinus Torvalds 
1200ed0de45aSStephen Suryaputra 	/* Recompile ip options since IPCB may not be valid anymore.
120120ff83f1SEric Dumazet 	 * Also check we have a reasonable ipv4 header.
1202ed0de45aSStephen Suryaputra 	 */
120320ff83f1SEric Dumazet 	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
120420ff83f1SEric Dumazet 	    ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
120520ff83f1SEric Dumazet 		return;
120620ff83f1SEric Dumazet 
1207ed0de45aSStephen Suryaputra 	memset(&opt, 0, sizeof(opt));
120820ff83f1SEric Dumazet 	if (ip_hdr(skb)->ihl > 5) {
120920ff83f1SEric Dumazet 		if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
121020ff83f1SEric Dumazet 			return;
1211ed0de45aSStephen Suryaputra 		opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
1212c543cb4aSEric Dumazet 
1213c543cb4aSEric Dumazet 		rcu_read_lock();
1214c543cb4aSEric Dumazet 		res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
1215c543cb4aSEric Dumazet 		rcu_read_unlock();
1216c543cb4aSEric Dumazet 
1217c543cb4aSEric Dumazet 		if (res)
1218ed0de45aSStephen Suryaputra 			return;
121920ff83f1SEric Dumazet 	}
1220ed0de45aSStephen Suryaputra 	__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
122120ff83f1SEric Dumazet }
122220ff83f1SEric Dumazet 
122320ff83f1SEric Dumazet static void ipv4_link_failure(struct sk_buff *skb)
122420ff83f1SEric Dumazet {
122520ff83f1SEric Dumazet 	struct rtable *rt;
122620ff83f1SEric Dumazet 
122720ff83f1SEric Dumazet 	ipv4_send_dest_unreach(skb);
12281da177e4SLinus Torvalds 
1229511c3f92SEric Dumazet 	rt = skb_rtable(skb);
12305943634fSDavid S. Miller 	if (rt)
12315943634fSDavid S. Miller 		dst_set_expires(&rt->dst, 0);
12322c8cec5cSDavid S. Miller }
12331da177e4SLinus Torvalds 
1234ede2059dSEric W. Biederman static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
12351da177e4SLinus Torvalds {
123691df42beSJoe Perches 	pr_debug("%s: %pI4 -> %pI4, %s\n",
123791df42beSJoe Perches 		 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
12381da177e4SLinus Torvalds 		 skb->dev ? skb->dev->name : "?");
12391da177e4SLinus Torvalds 	kfree_skb(skb);
1240c378a9c0SDave Jones 	WARN_ON(1);
12411da177e4SLinus Torvalds 	return 0;
12421da177e4SLinus Torvalds }
12431da177e4SLinus Torvalds 
12441da177e4SLinus Torvalds /*
12451da177e4SLinus Torvalds    We do not cache source address of outgoing interface,
12461da177e4SLinus Torvalds    because it is used only by IP RR, TS and SRR options,
12471da177e4SLinus Torvalds    so that it out of fast path.
12481da177e4SLinus Torvalds 
12491da177e4SLinus Torvalds    BTW remember: "addr" is allowed to be not aligned
12501da177e4SLinus Torvalds    in IP options!
12511da177e4SLinus Torvalds  */
12521da177e4SLinus Torvalds 
12538e36360aSDavid S. Miller void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
12541da177e4SLinus Torvalds {
1255a61ced5dSAl Viro 	__be32 src;
12561da177e4SLinus Torvalds 
1257c7537967SDavid S. Miller 	if (rt_is_output_route(rt))
1258c5be24ffSDavid S. Miller 		src = ip_hdr(skb)->saddr;
1259ebc0ffaeSEric Dumazet 	else {
12608e36360aSDavid S. Miller 		struct fib_result res;
1261e351bb62SMaciej Żenczykowski 		struct iphdr *iph = ip_hdr(skb);
1262e351bb62SMaciej Żenczykowski 		struct flowi4 fl4 = {
1263e351bb62SMaciej Żenczykowski 			.daddr = iph->daddr,
1264e351bb62SMaciej Żenczykowski 			.saddr = iph->saddr,
1265e351bb62SMaciej Żenczykowski 			.flowi4_tos = RT_TOS(iph->tos),
1266e351bb62SMaciej Żenczykowski 			.flowi4_oif = rt->dst.dev->ifindex,
1267e351bb62SMaciej Żenczykowski 			.flowi4_iif = skb->dev->ifindex,
1268e351bb62SMaciej Żenczykowski 			.flowi4_mark = skb->mark,
1269e351bb62SMaciej Żenczykowski 		};
12705e2b61f7SDavid S. Miller 
1271ebc0ffaeSEric Dumazet 		rcu_read_lock();
12720eeb075fSAndy Gospodarek 		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
1273eba618abSDavid Ahern 			src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
1274ebc0ffaeSEric Dumazet 		else
1275f8126f1dSDavid S. Miller 			src = inet_select_addr(rt->dst.dev,
1276f8126f1dSDavid S. Miller 					       rt_nexthop(rt, iph->daddr),
12771da177e4SLinus Torvalds 					       RT_SCOPE_UNIVERSE);
1278ebc0ffaeSEric Dumazet 		rcu_read_unlock();
1279ebc0ffaeSEric Dumazet 	}
12801da177e4SLinus Torvalds 	memcpy(addr, &src, 4);
12811da177e4SLinus Torvalds }
12821da177e4SLinus Torvalds 
1283c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
12841da177e4SLinus Torvalds static void set_class_tag(struct rtable *rt, u32 tag)
12851da177e4SLinus Torvalds {
1286d8d1f30bSChangli Gao 	if (!(rt->dst.tclassid & 0xFFFF))
1287d8d1f30bSChangli Gao 		rt->dst.tclassid |= tag & 0xFFFF;
1288d8d1f30bSChangli Gao 	if (!(rt->dst.tclassid & 0xFFFF0000))
1289d8d1f30bSChangli Gao 		rt->dst.tclassid |= tag & 0xFFFF0000;
12901da177e4SLinus Torvalds }
12911da177e4SLinus Torvalds #endif
12921da177e4SLinus Torvalds 
12930dbaee3bSDavid S. Miller static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
12940dbaee3bSDavid S. Miller {
12957ed14d97SGao Feng 	unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
1296164a5e7aSEric Dumazet 	unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
12970dbaee3bSDavid S. Miller 				    ip_rt_min_advmss);
12987ed14d97SGao Feng 
12997ed14d97SGao Feng 	return min(advmss, IPV4_MAX_PMTU - header_size);
13000dbaee3bSDavid S. Miller }
13010dbaee3bSDavid S. Miller 
1302ebb762f2SSteffen Klassert static unsigned int ipv4_mtu(const struct dst_entry *dst)
1303d33e4553SDavid S. Miller {
1304261663b0SSteffen Klassert 	const struct rtable *rt = (const struct rtable *) dst;
13055943634fSDavid S. Miller 	unsigned int mtu = rt->rt_pmtu;
13065943634fSDavid S. Miller 
130798d75c37SAlexander Duyck 	if (!mtu || time_after_eq(jiffies, rt->dst.expires))
13085943634fSDavid S. Miller 		mtu = dst_metric_raw(dst, RTAX_MTU);
1309618f9bc7SSteffen Klassert 
131038d523e2SSteffen Klassert 	if (mtu)
1311618f9bc7SSteffen Klassert 		return mtu;
1312618f9bc7SSteffen Klassert 
1313c780a049SEric Dumazet 	mtu = READ_ONCE(dst->dev->mtu);
1314d33e4553SDavid S. Miller 
1315d52e5a7eSSabrina Dubroca 	if (unlikely(ip_mtu_locked(dst))) {
13161550c171SDavid Ahern 		if (rt->rt_gw_family && mtu > 576)
1317d33e4553SDavid S. Miller 			mtu = 576;
1318d33e4553SDavid S. Miller 	}
1319d33e4553SDavid S. Miller 
132014972cbdSRoopa Prabhu 	mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
132114972cbdSRoopa Prabhu 
132214972cbdSRoopa Prabhu 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1323d33e4553SDavid S. Miller }
1324d33e4553SDavid S. Miller 
1325a5995e71SDavid Ahern static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
132694720e3aSJulian Anastasov {
132794720e3aSJulian Anastasov 	struct fnhe_hash_bucket *hash;
132894720e3aSJulian Anastasov 	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
132994720e3aSJulian Anastasov 	u32 hval = fnhe_hashfun(daddr);
133094720e3aSJulian Anastasov 
133194720e3aSJulian Anastasov 	spin_lock_bh(&fnhe_lock);
133294720e3aSJulian Anastasov 
1333a5995e71SDavid Ahern 	hash = rcu_dereference_protected(nhc->nhc_exceptions,
133494720e3aSJulian Anastasov 					 lockdep_is_held(&fnhe_lock));
133594720e3aSJulian Anastasov 	hash += hval;
133694720e3aSJulian Anastasov 
133794720e3aSJulian Anastasov 	fnhe_p = &hash->chain;
133894720e3aSJulian Anastasov 	fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
133994720e3aSJulian Anastasov 	while (fnhe) {
134094720e3aSJulian Anastasov 		if (fnhe->fnhe_daddr == daddr) {
134194720e3aSJulian Anastasov 			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
134294720e3aSJulian Anastasov 				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1343ee60ad21SXin Long 			/* set fnhe_daddr to 0 to ensure it won't bind with
1344ee60ad21SXin Long 			 * new dsts in rt_bind_exception().
1345ee60ad21SXin Long 			 */
1346ee60ad21SXin Long 			fnhe->fnhe_daddr = 0;
134794720e3aSJulian Anastasov 			fnhe_flush_routes(fnhe);
134894720e3aSJulian Anastasov 			kfree_rcu(fnhe, rcu);
134994720e3aSJulian Anastasov 			break;
135094720e3aSJulian Anastasov 		}
135194720e3aSJulian Anastasov 		fnhe_p = &fnhe->fnhe_next;
135294720e3aSJulian Anastasov 		fnhe = rcu_dereference_protected(fnhe->fnhe_next,
135394720e3aSJulian Anastasov 						 lockdep_is_held(&fnhe_lock));
135494720e3aSJulian Anastasov 	}
135594720e3aSJulian Anastasov 
135694720e3aSJulian Anastasov 	spin_unlock_bh(&fnhe_lock);
135794720e3aSJulian Anastasov }
135894720e3aSJulian Anastasov 
1359a5995e71SDavid Ahern static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
1360a5995e71SDavid Ahern 					       __be32 daddr)
13614895c771SDavid S. Miller {
1362a5995e71SDavid Ahern 	struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
13634895c771SDavid S. Miller 	struct fib_nh_exception *fnhe;
13644895c771SDavid S. Miller 	u32 hval;
13654895c771SDavid S. Miller 
1366f2bb4bedSDavid S. Miller 	if (!hash)
1367f2bb4bedSDavid S. Miller 		return NULL;
1368f2bb4bedSDavid S. Miller 
1369d3a25c98SDavid S. Miller 	hval = fnhe_hashfun(daddr);
13704895c771SDavid S. Miller 
13714895c771SDavid S. Miller 	for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
13724895c771SDavid S. Miller 	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
137394720e3aSJulian Anastasov 		if (fnhe->fnhe_daddr == daddr) {
137494720e3aSJulian Anastasov 			if (fnhe->fnhe_expires &&
137594720e3aSJulian Anastasov 			    time_after(jiffies, fnhe->fnhe_expires)) {
1376a5995e71SDavid Ahern 				ip_del_fnhe(nhc, daddr);
137794720e3aSJulian Anastasov 				break;
137894720e3aSJulian Anastasov 			}
1379f2bb4bedSDavid S. Miller 			return fnhe;
1380f2bb4bedSDavid S. Miller 		}
138194720e3aSJulian Anastasov 	}
1382f2bb4bedSDavid S. Miller 	return NULL;
1383f2bb4bedSDavid S. Miller }
1384f2bb4bedSDavid S. Miller 
138550d889b1SDavid Ahern /* MTU selection:
138650d889b1SDavid Ahern  * 1. mtu on route is locked - use it
138750d889b1SDavid Ahern  * 2. mtu from nexthop exception
138850d889b1SDavid Ahern  * 3. mtu from egress device
138950d889b1SDavid Ahern  */
139050d889b1SDavid Ahern 
139150d889b1SDavid Ahern u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
139250d889b1SDavid Ahern {
1393eba618abSDavid Ahern 	struct fib_nh_common *nhc = res->nhc;
1394eba618abSDavid Ahern 	struct net_device *dev = nhc->nhc_dev;
139550d889b1SDavid Ahern 	struct fib_info *fi = res->fi;
139650d889b1SDavid Ahern 	u32 mtu = 0;
139750d889b1SDavid Ahern 
139850d889b1SDavid Ahern 	if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
139950d889b1SDavid Ahern 	    fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
140050d889b1SDavid Ahern 		mtu = fi->fib_mtu;
140150d889b1SDavid Ahern 
140250d889b1SDavid Ahern 	if (likely(!mtu)) {
140350d889b1SDavid Ahern 		struct fib_nh_exception *fnhe;
140450d889b1SDavid Ahern 
1405a5995e71SDavid Ahern 		fnhe = find_exception(nhc, daddr);
140650d889b1SDavid Ahern 		if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
140750d889b1SDavid Ahern 			mtu = fnhe->fnhe_pmtu;
140850d889b1SDavid Ahern 	}
140950d889b1SDavid Ahern 
141050d889b1SDavid Ahern 	if (likely(!mtu))
141150d889b1SDavid Ahern 		mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
141250d889b1SDavid Ahern 
1413eba618abSDavid Ahern 	return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
141450d889b1SDavid Ahern }
141550d889b1SDavid Ahern 
1416caacf05eSDavid S. Miller static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1417a4c2fd7fSWei Wang 			      __be32 daddr, const bool do_cache)
1418f2bb4bedSDavid S. Miller {
1419caacf05eSDavid S. Miller 	bool ret = false;
1420caacf05eSDavid S. Miller 
1421c5038a83SDavid S. Miller 	spin_lock_bh(&fnhe_lock);
1422aee06da6SJulian Anastasov 
1423c5038a83SDavid S. Miller 	if (daddr == fnhe->fnhe_daddr) {
14242ffae99dSTimo Teräs 		struct rtable __rcu **porig;
14252ffae99dSTimo Teräs 		struct rtable *orig;
14265aad1de5STimo Teräs 		int genid = fnhe_genid(dev_net(rt->dst.dev));
14272ffae99dSTimo Teräs 
14282ffae99dSTimo Teräs 		if (rt_is_input_route(rt))
14292ffae99dSTimo Teräs 			porig = &fnhe->fnhe_rth_input;
14302ffae99dSTimo Teräs 		else
14312ffae99dSTimo Teräs 			porig = &fnhe->fnhe_rth_output;
14322ffae99dSTimo Teräs 		orig = rcu_dereference(*porig);
14335aad1de5STimo Teräs 
14345aad1de5STimo Teräs 		if (fnhe->fnhe_genid != genid) {
14355aad1de5STimo Teräs 			fnhe->fnhe_genid = genid;
143613d82bf5SSteffen Klassert 			fnhe->fnhe_gw = 0;
143713d82bf5SSteffen Klassert 			fnhe->fnhe_pmtu = 0;
143813d82bf5SSteffen Klassert 			fnhe->fnhe_expires = 0;
14390e8411e4SHangbin Liu 			fnhe->fnhe_mtu_locked = false;
14402ffae99dSTimo Teräs 			fnhe_flush_routes(fnhe);
14412ffae99dSTimo Teräs 			orig = NULL;
144213d82bf5SSteffen Klassert 		}
1443387aa65aSTimo Teräs 		fill_route_from_fnhe(rt, fnhe);
14441550c171SDavid Ahern 		if (!rt->rt_gw4) {
14451550c171SDavid Ahern 			rt->rt_gw4 = daddr;
14461550c171SDavid Ahern 			rt->rt_gw_family = AF_INET;
14471550c171SDavid Ahern 		}
1448f2bb4bedSDavid S. Miller 
1449a4c2fd7fSWei Wang 		if (do_cache) {
14500830106cSWei Wang 			dst_hold(&rt->dst);
14512ffae99dSTimo Teräs 			rcu_assign_pointer(*porig, rt);
14520830106cSWei Wang 			if (orig) {
145395c47f9cSWei Wang 				dst_dev_put(&orig->dst);
14540830106cSWei Wang 				dst_release(&orig->dst);
14550830106cSWei Wang 			}
14562ffae99dSTimo Teräs 			ret = true;
14572ffae99dSTimo Teräs 		}
1458c5038a83SDavid S. Miller 
1459c5038a83SDavid S. Miller 		fnhe->fnhe_stamp = jiffies;
1460c5038a83SDavid S. Miller 	}
1461c5038a83SDavid S. Miller 	spin_unlock_bh(&fnhe_lock);
1462caacf05eSDavid S. Miller 
1463caacf05eSDavid S. Miller 	return ret;
146454764bb6SEric Dumazet }
146554764bb6SEric Dumazet 
146687063a1fSDavid Ahern static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
1467f2bb4bedSDavid S. Miller {
1468d26b3a7cSEric Dumazet 	struct rtable *orig, *prev, **p;
1469caacf05eSDavid S. Miller 	bool ret = true;
1470f2bb4bedSDavid S. Miller 
1471d26b3a7cSEric Dumazet 	if (rt_is_input_route(rt)) {
14720f457a36SDavid Ahern 		p = (struct rtable **)&nhc->nhc_rth_input;
1473d26b3a7cSEric Dumazet 	} else {
14740f457a36SDavid Ahern 		p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
1475d26b3a7cSEric Dumazet 	}
1476f2bb4bedSDavid S. Miller 	orig = *p;
1477f2bb4bedSDavid S. Miller 
14780830106cSWei Wang 	/* hold dst before doing cmpxchg() to avoid race condition
14790830106cSWei Wang 	 * on this dst
14800830106cSWei Wang 	 */
14810830106cSWei Wang 	dst_hold(&rt->dst);
1482f2bb4bedSDavid S. Miller 	prev = cmpxchg(p, orig, rt);
1483f2bb4bedSDavid S. Miller 	if (prev == orig) {
14840830106cSWei Wang 		if (orig) {
148595c47f9cSWei Wang 			dst_dev_put(&orig->dst);
14860830106cSWei Wang 			dst_release(&orig->dst);
14870830106cSWei Wang 		}
14880830106cSWei Wang 	} else {
14890830106cSWei Wang 		dst_release(&rt->dst);
1490caacf05eSDavid S. Miller 		ret = false;
14910830106cSWei Wang 	}
1492caacf05eSDavid S. Miller 
1493caacf05eSDavid S. Miller 	return ret;
1494caacf05eSDavid S. Miller }
1495caacf05eSDavid S. Miller 
14965055c371SEric Dumazet struct uncached_list {
14975055c371SEric Dumazet 	spinlock_t		lock;
14985055c371SEric Dumazet 	struct list_head	head;
14995055c371SEric Dumazet };
15005055c371SEric Dumazet 
15015055c371SEric Dumazet static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
1502caacf05eSDavid S. Miller 
1503510c321bSXin Long void rt_add_uncached_list(struct rtable *rt)
1504caacf05eSDavid S. Miller {
15055055c371SEric Dumazet 	struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
15065055c371SEric Dumazet 
15075055c371SEric Dumazet 	rt->rt_uncached_list = ul;
15085055c371SEric Dumazet 
15095055c371SEric Dumazet 	spin_lock_bh(&ul->lock);
15105055c371SEric Dumazet 	list_add_tail(&rt->rt_uncached, &ul->head);
15115055c371SEric Dumazet 	spin_unlock_bh(&ul->lock);
1512caacf05eSDavid S. Miller }
1513caacf05eSDavid S. Miller 
1514510c321bSXin Long void rt_del_uncached_list(struct rtable *rt)
1515510c321bSXin Long {
1516510c321bSXin Long 	if (!list_empty(&rt->rt_uncached)) {
1517510c321bSXin Long 		struct uncached_list *ul = rt->rt_uncached_list;
1518510c321bSXin Long 
1519510c321bSXin Long 		spin_lock_bh(&ul->lock);
1520510c321bSXin Long 		list_del(&rt->rt_uncached);
1521510c321bSXin Long 		spin_unlock_bh(&ul->lock);
1522510c321bSXin Long 	}
1523510c321bSXin Long }
1524510c321bSXin Long 
1525caacf05eSDavid S. Miller static void ipv4_dst_destroy(struct dst_entry *dst)
1526caacf05eSDavid S. Miller {
1527caacf05eSDavid S. Miller 	struct rtable *rt = (struct rtable *)dst;
1528caacf05eSDavid S. Miller 
15291620a336SDavid Ahern 	ip_dst_metrics_put(dst);
1530510c321bSXin Long 	rt_del_uncached_list(rt);
1531caacf05eSDavid S. Miller }
1532caacf05eSDavid S. Miller 
1533caacf05eSDavid S. Miller void rt_flush_dev(struct net_device *dev)
1534caacf05eSDavid S. Miller {
1535caacf05eSDavid S. Miller 	struct net *net = dev_net(dev);
1536caacf05eSDavid S. Miller 	struct rtable *rt;
15375055c371SEric Dumazet 	int cpu;
1538caacf05eSDavid S. Miller 
15395055c371SEric Dumazet 	for_each_possible_cpu(cpu) {
15405055c371SEric Dumazet 		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
15415055c371SEric Dumazet 
15425055c371SEric Dumazet 		spin_lock_bh(&ul->lock);
15435055c371SEric Dumazet 		list_for_each_entry(rt, &ul->head, rt_uncached) {
1544caacf05eSDavid S. Miller 			if (rt->dst.dev != dev)
1545caacf05eSDavid S. Miller 				continue;
1546caacf05eSDavid S. Miller 			rt->dst.dev = net->loopback_dev;
1547caacf05eSDavid S. Miller 			dev_hold(rt->dst.dev);
1548caacf05eSDavid S. Miller 			dev_put(dev);
1549caacf05eSDavid S. Miller 		}
15505055c371SEric Dumazet 		spin_unlock_bh(&ul->lock);
15514895c771SDavid S. Miller 	}
15524895c771SDavid S. Miller }
15534895c771SDavid S. Miller 
15544331debcSEric Dumazet static bool rt_cache_valid(const struct rtable *rt)
1555d2d68ba9SDavid S. Miller {
15564331debcSEric Dumazet 	return	rt &&
15574331debcSEric Dumazet 		rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
15584331debcSEric Dumazet 		!rt_is_expired(rt);
1559d2d68ba9SDavid S. Miller }
1560d2d68ba9SDavid S. Miller 
1561f2bb4bedSDavid S. Miller static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
15625e2b61f7SDavid S. Miller 			   const struct fib_result *res,
1563f2bb4bedSDavid S. Miller 			   struct fib_nh_exception *fnhe,
1564a4c2fd7fSWei Wang 			   struct fib_info *fi, u16 type, u32 itag,
1565a4c2fd7fSWei Wang 			   const bool do_cache)
15661da177e4SLinus Torvalds {
1567caacf05eSDavid S. Miller 	bool cached = false;
1568caacf05eSDavid S. Miller 
15691da177e4SLinus Torvalds 	if (fi) {
1570eba618abSDavid Ahern 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
15714895c771SDavid S. Miller 
15720f5f7d7bSDavid Ahern 		if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
15730f5f7d7bSDavid Ahern 			rt->rt_gw_family = nhc->nhc_gw_family;
15740f5f7d7bSDavid Ahern 			/* only INET and INET6 are supported */
15750f5f7d7bSDavid Ahern 			if (likely(nhc->nhc_gw_family == AF_INET))
15760f5f7d7bSDavid Ahern 				rt->rt_gw4 = nhc->nhc_gw.ipv4;
15770f5f7d7bSDavid Ahern 			else
15780f5f7d7bSDavid Ahern 				rt->rt_gw6 = nhc->nhc_gw.ipv6;
1579155e8336SJulian Anastasov 		}
15800f5f7d7bSDavid Ahern 
1581e1255ed4SDavid Ahern 		ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
1582e1255ed4SDavid Ahern 
1583c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
1584dcb1ecb5SDavid Ahern 		if (nhc->nhc_family == AF_INET) {
158587063a1fSDavid Ahern 			struct fib_nh *nh;
158687063a1fSDavid Ahern 
158787063a1fSDavid Ahern 			nh = container_of(nhc, struct fib_nh, nh_common);
1588f2bb4bedSDavid S. Miller 			rt->dst.tclassid = nh->nh_tclassid;
158987063a1fSDavid Ahern 		}
15901da177e4SLinus Torvalds #endif
159187063a1fSDavid Ahern 		rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
1592c5038a83SDavid S. Miller 		if (unlikely(fnhe))
1593a4c2fd7fSWei Wang 			cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1594a4c2fd7fSWei Wang 		else if (do_cache)
159587063a1fSDavid Ahern 			cached = rt_cache_route(nhc, rt);
1596155e8336SJulian Anastasov 		if (unlikely(!cached)) {
1597155e8336SJulian Anastasov 			/* Routes we intend to cache in nexthop exception or
1598155e8336SJulian Anastasov 			 * FIB nexthop have the DST_NOCACHE bit clear.
1599155e8336SJulian Anastasov 			 * However, if we are unsuccessful at storing this
1600155e8336SJulian Anastasov 			 * route into the cache we really need to set it.
1601155e8336SJulian Anastasov 			 */
16021550c171SDavid Ahern 			if (!rt->rt_gw4) {
16031550c171SDavid Ahern 				rt->rt_gw_family = AF_INET;
16041550c171SDavid Ahern 				rt->rt_gw4 = daddr;
16051550c171SDavid Ahern 			}
1606155e8336SJulian Anastasov 			rt_add_uncached_list(rt);
1607d33e4553SDavid S. Miller 		}
1608155e8336SJulian Anastasov 	} else
1609caacf05eSDavid S. Miller 		rt_add_uncached_list(rt);
16101da177e4SLinus Torvalds 
1611c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
16121da177e4SLinus Torvalds #ifdef CONFIG_IP_MULTIPLE_TABLES
161385b91b03SDavid S. Miller 	set_class_tag(rt, res->tclassid);
16141da177e4SLinus Torvalds #endif
16151da177e4SLinus Torvalds 	set_class_tag(rt, itag);
16161da177e4SLinus Torvalds #endif
16171da177e4SLinus Torvalds }
16181da177e4SLinus Torvalds 
16199ab179d8SDavid Ahern struct rtable *rt_dst_alloc(struct net_device *dev,
1620d08c4f35SDavid Ahern 			    unsigned int flags, u16 type,
1621f2bb4bedSDavid S. Miller 			    bool nopolicy, bool noxfrm, bool will_cache)
16220c4dcd58SDavid S. Miller {
1623d08c4f35SDavid Ahern 	struct rtable *rt;
1624d08c4f35SDavid Ahern 
1625d08c4f35SDavid Ahern 	rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1626a4c2fd7fSWei Wang 		       (will_cache ? 0 : DST_HOST) |
16270c4dcd58SDavid S. Miller 		       (nopolicy ? DST_NOPOLICY : 0) |
1628b2a9c0edSWei Wang 		       (noxfrm ? DST_NOXFRM : 0));
1629d08c4f35SDavid Ahern 
1630d08c4f35SDavid Ahern 	if (rt) {
1631d08c4f35SDavid Ahern 		rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1632d08c4f35SDavid Ahern 		rt->rt_flags = flags;
1633d08c4f35SDavid Ahern 		rt->rt_type = type;
1634d08c4f35SDavid Ahern 		rt->rt_is_input = 0;
1635d08c4f35SDavid Ahern 		rt->rt_iif = 0;
1636d08c4f35SDavid Ahern 		rt->rt_pmtu = 0;
1637d52e5a7eSSabrina Dubroca 		rt->rt_mtu_locked = 0;
16381550c171SDavid Ahern 		rt->rt_gw_family = 0;
16391550c171SDavid Ahern 		rt->rt_gw4 = 0;
1640d08c4f35SDavid Ahern 		INIT_LIST_HEAD(&rt->rt_uncached);
1641d08c4f35SDavid Ahern 
1642d08c4f35SDavid Ahern 		rt->dst.output = ip_output;
1643d08c4f35SDavid Ahern 		if (flags & RTCF_LOCAL)
1644d08c4f35SDavid Ahern 			rt->dst.input = ip_local_deliver;
1645d08c4f35SDavid Ahern 	}
1646d08c4f35SDavid Ahern 
1647d08c4f35SDavid Ahern 	return rt;
16480c4dcd58SDavid S. Miller }
16499ab179d8SDavid Ahern EXPORT_SYMBOL(rt_dst_alloc);
16500c4dcd58SDavid S. Miller 
165196d36220SEric Dumazet /* called in rcu_read_lock() section */
1652bc044e8dSPaolo Abeni int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1653bc044e8dSPaolo Abeni 			  u8 tos, struct net_device *dev,
1654bc044e8dSPaolo Abeni 			  struct in_device *in_dev, u32 *itag)
16551da177e4SLinus Torvalds {
1656b5f7e755SEric Dumazet 	int err;
16571da177e4SLinus Torvalds 
16581da177e4SLinus Torvalds 	/* Primary sanity checks. */
165951456b29SIan Morris 	if (!in_dev)
16601da177e4SLinus Torvalds 		return -EINVAL;
16611da177e4SLinus Torvalds 
16621e637c74SJan Engelhardt 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1663d0daebc3SThomas Graf 	    skb->protocol != htons(ETH_P_IP))
1664bc044e8dSPaolo Abeni 		return -EINVAL;
1665d0daebc3SThomas Graf 
166675fea73dSAlexander Duyck 	if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1667bc044e8dSPaolo Abeni 		return -EINVAL;
16681da177e4SLinus Torvalds 
1669f97c1e0cSJoe Perches 	if (ipv4_is_zeronet(saddr)) {
16701d2f4ebbSEdward Chron 		if (!ipv4_is_local_multicast(daddr) &&
16711d2f4ebbSEdward Chron 		    ip_hdr(skb)->protocol != IPPROTO_IGMP)
1672bc044e8dSPaolo Abeni 			return -EINVAL;
1673b5f7e755SEric Dumazet 	} else {
16749e56e380SDavid S. Miller 		err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1675bc044e8dSPaolo Abeni 					  in_dev, itag);
1676b5f7e755SEric Dumazet 		if (err < 0)
1677bc044e8dSPaolo Abeni 			return err;
1678b5f7e755SEric Dumazet 	}
1679bc044e8dSPaolo Abeni 	return 0;
1680bc044e8dSPaolo Abeni }
1681bc044e8dSPaolo Abeni 
1682bc044e8dSPaolo Abeni /* called in rcu_read_lock() section */
1683bc044e8dSPaolo Abeni static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1684bc044e8dSPaolo Abeni 			     u8 tos, struct net_device *dev, int our)
1685bc044e8dSPaolo Abeni {
1686bc044e8dSPaolo Abeni 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1687bc044e8dSPaolo Abeni 	unsigned int flags = RTCF_MULTICAST;
1688bc044e8dSPaolo Abeni 	struct rtable *rth;
1689bc044e8dSPaolo Abeni 	u32 itag = 0;
1690bc044e8dSPaolo Abeni 	int err;
1691bc044e8dSPaolo Abeni 
1692bc044e8dSPaolo Abeni 	err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1693bc044e8dSPaolo Abeni 	if (err)
1694bc044e8dSPaolo Abeni 		return err;
1695bc044e8dSPaolo Abeni 
1696d08c4f35SDavid Ahern 	if (our)
1697d08c4f35SDavid Ahern 		flags |= RTCF_LOCAL;
1698d08c4f35SDavid Ahern 
1699d08c4f35SDavid Ahern 	rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1700f2bb4bedSDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
17011da177e4SLinus Torvalds 	if (!rth)
1702bc044e8dSPaolo Abeni 		return -ENOBUFS;
17031da177e4SLinus Torvalds 
1704c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
1705d8d1f30bSChangli Gao 	rth->dst.tclassid = itag;
17061da177e4SLinus Torvalds #endif
1707cf911662SDavid S. Miller 	rth->dst.output = ip_rt_bug;
17089917e1e8SDavid S. Miller 	rth->rt_is_input= 1;
17091da177e4SLinus Torvalds 
17101da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE
1711f97c1e0cSJoe Perches 	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1712d8d1f30bSChangli Gao 		rth->dst.input = ip_mr_input;
17131da177e4SLinus Torvalds #endif
17141da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_slow_mc);
17151da177e4SLinus Torvalds 
171689aef892SDavid S. Miller 	skb_dst_set(skb, &rth->dst);
171789aef892SDavid S. Miller 	return 0;
17181da177e4SLinus Torvalds }
17191da177e4SLinus Torvalds 
17201da177e4SLinus Torvalds 
17211da177e4SLinus Torvalds static void ip_handle_martian_source(struct net_device *dev,
17221da177e4SLinus Torvalds 				     struct in_device *in_dev,
17231da177e4SLinus Torvalds 				     struct sk_buff *skb,
17249e12bb22SAl Viro 				     __be32 daddr,
17259e12bb22SAl Viro 				     __be32 saddr)
17261da177e4SLinus Torvalds {
17271da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_martian_src);
17281da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE
17291da177e4SLinus Torvalds 	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
17301da177e4SLinus Torvalds 		/*
17311da177e4SLinus Torvalds 		 *	RFC1812 recommendation, if source is martian,
17321da177e4SLinus Torvalds 		 *	the only hint is MAC header.
17331da177e4SLinus Torvalds 		 */
1734058bd4d2SJoe Perches 		pr_warn("martian source %pI4 from %pI4, on dev %s\n",
1735673d57e7SHarvey Harrison 			&daddr, &saddr, dev->name);
173698e399f8SArnaldo Carvalho de Melo 		if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1737058bd4d2SJoe Perches 			print_hex_dump(KERN_WARNING, "ll header: ",
1738058bd4d2SJoe Perches 				       DUMP_PREFIX_OFFSET, 16, 1,
1739058bd4d2SJoe Perches 				       skb_mac_header(skb),
1740b2c85100SDavid S. Miller 				       dev->hard_header_len, false);
17411da177e4SLinus Torvalds 		}
17421da177e4SLinus Torvalds 	}
17431da177e4SLinus Torvalds #endif
17441da177e4SLinus Torvalds }
17451da177e4SLinus Torvalds 
174647360228SEric Dumazet /* called in rcu_read_lock() section */
17475969f71dSStephen Hemminger static int __mkroute_input(struct sk_buff *skb,
1748982721f3SDavid S. Miller 			   const struct fib_result *res,
17491da177e4SLinus Torvalds 			   struct in_device *in_dev,
1750c6cffba4SDavid S. Miller 			   __be32 daddr, __be32 saddr, u32 tos)
17511da177e4SLinus Torvalds {
1752eba618abSDavid Ahern 	struct fib_nh_common *nhc = FIB_RES_NHC(*res);
1753eba618abSDavid Ahern 	struct net_device *dev = nhc->nhc_dev;
17542ffae99dSTimo Teräs 	struct fib_nh_exception *fnhe;
17551da177e4SLinus Torvalds 	struct rtable *rth;
17561da177e4SLinus Torvalds 	int err;
17571da177e4SLinus Torvalds 	struct in_device *out_dev;
1758d2d68ba9SDavid S. Miller 	bool do_cache;
1759fbdc0ad0SLi RongQing 	u32 itag = 0;
17601da177e4SLinus Torvalds 
17611da177e4SLinus Torvalds 	/* get a working reference to the output device */
1762eba618abSDavid Ahern 	out_dev = __in_dev_get_rcu(dev);
176351456b29SIan Morris 	if (!out_dev) {
1764e87cc472SJoe Perches 		net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
17651da177e4SLinus Torvalds 		return -EINVAL;
17661da177e4SLinus Torvalds 	}
17671da177e4SLinus Torvalds 
17685c04c819SMichael Smith 	err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
17699e56e380SDavid S. Miller 				  in_dev->dev, in_dev, &itag);
17701da177e4SLinus Torvalds 	if (err < 0) {
17711da177e4SLinus Torvalds 		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
17721da177e4SLinus Torvalds 					 saddr);
17731da177e4SLinus Torvalds 
17741da177e4SLinus Torvalds 		goto cleanup;
17751da177e4SLinus Torvalds 	}
17761da177e4SLinus Torvalds 
1777e81da0e1SJulian Anastasov 	do_cache = res->fi && !itag;
1778e81da0e1SJulian Anastasov 	if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1779eba618abSDavid Ahern 	    skb->protocol == htons(ETH_P_IP)) {
1780bdf00467SDavid Ahern 		__be32 gw;
1781eba618abSDavid Ahern 
1782bdf00467SDavid Ahern 		gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
1783eba618abSDavid Ahern 		if (IN_DEV_SHARED_MEDIA(out_dev) ||
1784eba618abSDavid Ahern 		    inet_addr_onlink(out_dev, saddr, gw))
1785df4d9254SHannes Frederic Sowa 			IPCB(skb)->flags |= IPSKB_DOREDIRECT;
1786eba618abSDavid Ahern 	}
17871da177e4SLinus Torvalds 
17881da177e4SLinus Torvalds 	if (skb->protocol != htons(ETH_P_IP)) {
17891da177e4SLinus Torvalds 		/* Not IP (i.e. ARP). Do not create route, if it is
17901da177e4SLinus Torvalds 		 * invalid for proxy arp. DNAT routes are always valid.
179165324144SJesper Dangaard Brouer 		 *
179265324144SJesper Dangaard Brouer 		 * Proxy arp feature have been extended to allow, ARP
179365324144SJesper Dangaard Brouer 		 * replies back to the same interface, to support
179465324144SJesper Dangaard Brouer 		 * Private VLAN switch technologies. See arp.c.
17951da177e4SLinus Torvalds 		 */
179665324144SJesper Dangaard Brouer 		if (out_dev == in_dev &&
179765324144SJesper Dangaard Brouer 		    IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
17981da177e4SLinus Torvalds 			err = -EINVAL;
17991da177e4SLinus Torvalds 			goto cleanup;
18001da177e4SLinus Torvalds 		}
18011da177e4SLinus Torvalds 	}
18021da177e4SLinus Torvalds 
1803a5995e71SDavid Ahern 	fnhe = find_exception(nhc, daddr);
1804e81da0e1SJulian Anastasov 	if (do_cache) {
180594720e3aSJulian Anastasov 		if (fnhe)
18062ffae99dSTimo Teräs 			rth = rcu_dereference(fnhe->fnhe_rth_input);
180794720e3aSJulian Anastasov 		else
18080f457a36SDavid Ahern 			rth = rcu_dereference(nhc->nhc_rth_input);
1809d2d68ba9SDavid S. Miller 		if (rt_cache_valid(rth)) {
1810c6cffba4SDavid S. Miller 			skb_dst_set_noref(skb, &rth->dst);
1811d2d68ba9SDavid S. Miller 			goto out;
1812d2d68ba9SDavid S. Miller 		}
1813d2d68ba9SDavid S. Miller 	}
1814f2bb4bedSDavid S. Miller 
1815d08c4f35SDavid Ahern 	rth = rt_dst_alloc(out_dev->dev, 0, res->type,
18165c1e6aa3SDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
1817d2d68ba9SDavid S. Miller 			   IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
18181da177e4SLinus Torvalds 	if (!rth) {
18191da177e4SLinus Torvalds 		err = -ENOBUFS;
18201da177e4SLinus Torvalds 		goto cleanup;
18211da177e4SLinus Torvalds 	}
18221da177e4SLinus Torvalds 
18239917e1e8SDavid S. Miller 	rth->rt_is_input = 1;
1824a6254864SDuan Jiong 	RT_CACHE_STAT_INC(in_slow_tot);
18251da177e4SLinus Torvalds 
1826d8d1f30bSChangli Gao 	rth->dst.input = ip_forward;
18271da177e4SLinus Torvalds 
1828a4c2fd7fSWei Wang 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1829a4c2fd7fSWei Wang 		       do_cache);
18309942895bSDavid Ahern 	lwtunnel_set_redirect(&rth->dst);
1831c6cffba4SDavid S. Miller 	skb_dst_set(skb, &rth->dst);
1832d2d68ba9SDavid S. Miller out:
18331da177e4SLinus Torvalds 	err = 0;
18341da177e4SLinus Torvalds  cleanup:
18351da177e4SLinus Torvalds 	return err;
18361da177e4SLinus Torvalds }
18371da177e4SLinus Torvalds 
183879a13159SPeter Nørlund #ifdef CONFIG_IP_ROUTE_MULTIPATH
183979a13159SPeter Nørlund /* To make ICMP packets follow the right flow, the multipath hash is
1840bf4e0a3dSNikolay Aleksandrov  * calculated from the inner IP addresses.
184179a13159SPeter Nørlund  */
1842bf4e0a3dSNikolay Aleksandrov static void ip_multipath_l3_keys(const struct sk_buff *skb,
1843bf4e0a3dSNikolay Aleksandrov 				 struct flow_keys *hash_keys)
184479a13159SPeter Nørlund {
184579a13159SPeter Nørlund 	const struct iphdr *outer_iph = ip_hdr(skb);
18466f74b6c2SDavid Ahern 	const struct iphdr *key_iph = outer_iph;
1847bf4e0a3dSNikolay Aleksandrov 	const struct iphdr *inner_iph;
184879a13159SPeter Nørlund 	const struct icmphdr *icmph;
184979a13159SPeter Nørlund 	struct iphdr _inner_iph;
1850bf4e0a3dSNikolay Aleksandrov 	struct icmphdr _icmph;
1851bf4e0a3dSNikolay Aleksandrov 
1852bf4e0a3dSNikolay Aleksandrov 	if (likely(outer_iph->protocol != IPPROTO_ICMP))
18536f74b6c2SDavid Ahern 		goto out;
185479a13159SPeter Nørlund 
185579a13159SPeter Nørlund 	if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
18566f74b6c2SDavid Ahern 		goto out;
185779a13159SPeter Nørlund 
185879a13159SPeter Nørlund 	icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
185979a13159SPeter Nørlund 				   &_icmph);
186079a13159SPeter Nørlund 	if (!icmph)
18616f74b6c2SDavid Ahern 		goto out;
186279a13159SPeter Nørlund 
186379a13159SPeter Nørlund 	if (icmph->type != ICMP_DEST_UNREACH &&
186479a13159SPeter Nørlund 	    icmph->type != ICMP_REDIRECT &&
186579a13159SPeter Nørlund 	    icmph->type != ICMP_TIME_EXCEEDED &&
1866bf4e0a3dSNikolay Aleksandrov 	    icmph->type != ICMP_PARAMETERPROB)
18676f74b6c2SDavid Ahern 		goto out;
186879a13159SPeter Nørlund 
186979a13159SPeter Nørlund 	inner_iph = skb_header_pointer(skb,
187079a13159SPeter Nørlund 				       outer_iph->ihl * 4 + sizeof(_icmph),
187179a13159SPeter Nørlund 				       sizeof(_inner_iph), &_inner_iph);
187279a13159SPeter Nørlund 	if (!inner_iph)
18736f74b6c2SDavid Ahern 		goto out;
18746f74b6c2SDavid Ahern 
18756f74b6c2SDavid Ahern 	key_iph = inner_iph;
18766f74b6c2SDavid Ahern out:
18776f74b6c2SDavid Ahern 	hash_keys->addrs.v4addrs.src = key_iph->saddr;
18786f74b6c2SDavid Ahern 	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
187979a13159SPeter Nørlund }
188079a13159SPeter Nørlund 
1881bf4e0a3dSNikolay Aleksandrov /* if skb is set it will be used and fl4 can be NULL */
18827efc0b6bSDavid Ahern int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
1883e37b1e97SRoopa Prabhu 		       const struct sk_buff *skb, struct flow_keys *flkeys)
1884bf4e0a3dSNikolay Aleksandrov {
18852a8e4997SIdo Schimmel 	u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
1886bf4e0a3dSNikolay Aleksandrov 	struct flow_keys hash_keys;
1887bf4e0a3dSNikolay Aleksandrov 	u32 mhash;
1888bf4e0a3dSNikolay Aleksandrov 
1889bf4e0a3dSNikolay Aleksandrov 	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1890bf4e0a3dSNikolay Aleksandrov 	case 0:
1891bf4e0a3dSNikolay Aleksandrov 		memset(&hash_keys, 0, sizeof(hash_keys));
1892bf4e0a3dSNikolay Aleksandrov 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1893bf4e0a3dSNikolay Aleksandrov 		if (skb) {
1894bf4e0a3dSNikolay Aleksandrov 			ip_multipath_l3_keys(skb, &hash_keys);
1895bf4e0a3dSNikolay Aleksandrov 		} else {
1896bf4e0a3dSNikolay Aleksandrov 			hash_keys.addrs.v4addrs.src = fl4->saddr;
1897bf4e0a3dSNikolay Aleksandrov 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
1898bf4e0a3dSNikolay Aleksandrov 		}
1899bf4e0a3dSNikolay Aleksandrov 		break;
1900bf4e0a3dSNikolay Aleksandrov 	case 1:
1901bf4e0a3dSNikolay Aleksandrov 		/* skb is currently provided only when forwarding */
1902bf4e0a3dSNikolay Aleksandrov 		if (skb) {
1903bf4e0a3dSNikolay Aleksandrov 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1904bf4e0a3dSNikolay Aleksandrov 			struct flow_keys keys;
1905bf4e0a3dSNikolay Aleksandrov 
1906bf4e0a3dSNikolay Aleksandrov 			/* short-circuit if we already have L4 hash present */
1907bf4e0a3dSNikolay Aleksandrov 			if (skb->l4_hash)
1908bf4e0a3dSNikolay Aleksandrov 				return skb_get_hash_raw(skb) >> 1;
1909ec7127a5SDavid Ahern 
1910bf4e0a3dSNikolay Aleksandrov 			memset(&hash_keys, 0, sizeof(hash_keys));
19111fe4b118SDavid Ahern 
1912ec7127a5SDavid Ahern 			if (!flkeys) {
1913ec7127a5SDavid Ahern 				skb_flow_dissect_flow_keys(skb, &keys, flag);
1914ec7127a5SDavid Ahern 				flkeys = &keys;
1915ec7127a5SDavid Ahern 			}
1916ec7127a5SDavid Ahern 
1917e37b1e97SRoopa Prabhu 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1918e37b1e97SRoopa Prabhu 			hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
1919e37b1e97SRoopa Prabhu 			hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
1920e37b1e97SRoopa Prabhu 			hash_keys.ports.src = flkeys->ports.src;
1921e37b1e97SRoopa Prabhu 			hash_keys.ports.dst = flkeys->ports.dst;
1922e37b1e97SRoopa Prabhu 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
1923e37b1e97SRoopa Prabhu 		} else {
1924bf4e0a3dSNikolay Aleksandrov 			memset(&hash_keys, 0, sizeof(hash_keys));
1925bf4e0a3dSNikolay Aleksandrov 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1926bf4e0a3dSNikolay Aleksandrov 			hash_keys.addrs.v4addrs.src = fl4->saddr;
1927bf4e0a3dSNikolay Aleksandrov 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
1928bf4e0a3dSNikolay Aleksandrov 			hash_keys.ports.src = fl4->fl4_sport;
1929bf4e0a3dSNikolay Aleksandrov 			hash_keys.ports.dst = fl4->fl4_dport;
1930bf4e0a3dSNikolay Aleksandrov 			hash_keys.basic.ip_proto = fl4->flowi4_proto;
1931bf4e0a3dSNikolay Aleksandrov 		}
1932bf4e0a3dSNikolay Aleksandrov 		break;
1933363887a2SStephen Suryaputra 	case 2:
1934363887a2SStephen Suryaputra 		memset(&hash_keys, 0, sizeof(hash_keys));
1935363887a2SStephen Suryaputra 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1936363887a2SStephen Suryaputra 		/* skb is currently provided only when forwarding */
1937363887a2SStephen Suryaputra 		if (skb) {
1938363887a2SStephen Suryaputra 			struct flow_keys keys;
1939363887a2SStephen Suryaputra 
1940363887a2SStephen Suryaputra 			skb_flow_dissect_flow_keys(skb, &keys, 0);
1941363887a2SStephen Suryaputra 
1942363887a2SStephen Suryaputra 			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
1943363887a2SStephen Suryaputra 			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
1944363887a2SStephen Suryaputra 		} else {
1945363887a2SStephen Suryaputra 			/* Same as case 0 */
1946363887a2SStephen Suryaputra 			hash_keys.addrs.v4addrs.src = fl4->saddr;
1947363887a2SStephen Suryaputra 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
1948363887a2SStephen Suryaputra 		}
1949363887a2SStephen Suryaputra 		break;
1950bf4e0a3dSNikolay Aleksandrov 	}
1951bf4e0a3dSNikolay Aleksandrov 	mhash = flow_hash_from_keys(&hash_keys);
1952bf4e0a3dSNikolay Aleksandrov 
195324ba1440Swenxu 	if (multipath_hash)
195424ba1440Swenxu 		mhash = jhash_2words(mhash, multipath_hash, 0);
195524ba1440Swenxu 
1956bf4e0a3dSNikolay Aleksandrov 	return mhash >> 1;
1957bf4e0a3dSNikolay Aleksandrov }
195879a13159SPeter Nørlund #endif /* CONFIG_IP_ROUTE_MULTIPATH */
195979a13159SPeter Nørlund 
19605969f71dSStephen Hemminger static int ip_mkroute_input(struct sk_buff *skb,
19611da177e4SLinus Torvalds 			    struct fib_result *res,
19621da177e4SLinus Torvalds 			    struct in_device *in_dev,
1963e37b1e97SRoopa Prabhu 			    __be32 daddr, __be32 saddr, u32 tos,
1964e37b1e97SRoopa Prabhu 			    struct flow_keys *hkeys)
19651da177e4SLinus Torvalds {
19661da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
19675481d73fSDavid Ahern 	if (res->fi && fib_info_num_path(res->fi) > 1) {
19687efc0b6bSDavid Ahern 		int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
19690e884c78SPeter Nørlund 
19700e884c78SPeter Nørlund 		fib_select_multipath(res, h);
19710e884c78SPeter Nørlund 	}
19721da177e4SLinus Torvalds #endif
19731da177e4SLinus Torvalds 
19741da177e4SLinus Torvalds 	/* create a routing cache entry */
1975c6cffba4SDavid S. Miller 	return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
19761da177e4SLinus Torvalds }
19771da177e4SLinus Torvalds 
19781da177e4SLinus Torvalds /*
19791da177e4SLinus Torvalds  *	NOTE. We drop all the packets that has local source
19801da177e4SLinus Torvalds  *	addresses, because every properly looped back packet
19811da177e4SLinus Torvalds  *	must have correct destination already attached by output routine.
19821da177e4SLinus Torvalds  *
19831da177e4SLinus Torvalds  *	Such approach solves two big problems:
19841da177e4SLinus Torvalds  *	1. Not simplex devices are handled properly.
19851da177e4SLinus Torvalds  *	2. IP spoofing attempts are filtered with 100% of guarantee.
1986ebc0ffaeSEric Dumazet  *	called with rcu_read_lock()
19871da177e4SLinus Torvalds  */
19881da177e4SLinus Torvalds 
19899e12bb22SAl Viro static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
19905510cdf7SDavid Ahern 			       u8 tos, struct net_device *dev,
19915510cdf7SDavid Ahern 			       struct fib_result *res)
19921da177e4SLinus Torvalds {
199396d36220SEric Dumazet 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1994e37b1e97SRoopa Prabhu 	struct flow_keys *flkeys = NULL, _flkeys;
1995e37b1e97SRoopa Prabhu 	struct net    *net = dev_net(dev);
19961b7179d3SThomas Graf 	struct ip_tunnel_info *tun_info;
1997e37b1e97SRoopa Prabhu 	int		err = -EINVAL;
199895c96174SEric Dumazet 	unsigned int	flags = 0;
19991da177e4SLinus Torvalds 	u32		itag = 0;
20001da177e4SLinus Torvalds 	struct rtable	*rth;
2001e37b1e97SRoopa Prabhu 	struct flowi4	fl4;
20020a90478bSXin Long 	bool do_cache = true;
20031da177e4SLinus Torvalds 
20041da177e4SLinus Torvalds 	/* IP on this device is disabled. */
20051da177e4SLinus Torvalds 
20061da177e4SLinus Torvalds 	if (!in_dev)
20071da177e4SLinus Torvalds 		goto out;
20081da177e4SLinus Torvalds 
20091da177e4SLinus Torvalds 	/* Check for the most weird martians, which can be not detected
20101da177e4SLinus Torvalds 	   by fib_lookup.
20111da177e4SLinus Torvalds 	 */
20121da177e4SLinus Torvalds 
201361adedf3SJiri Benc 	tun_info = skb_tunnel_info(skb);
201446fa062aSJiri Benc 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
20151b7179d3SThomas Graf 		fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
20161b7179d3SThomas Graf 	else
20171b7179d3SThomas Graf 		fl4.flowi4_tun_key.tun_id = 0;
2018f38a9eb1SThomas Graf 	skb_dst_drop(skb);
2019f38a9eb1SThomas Graf 
2020d0daebc3SThomas Graf 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
20211da177e4SLinus Torvalds 		goto martian_source;
20221da177e4SLinus Torvalds 
20235510cdf7SDavid Ahern 	res->fi = NULL;
20245510cdf7SDavid Ahern 	res->table = NULL;
202527a954bdSAndy Walls 	if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
20261da177e4SLinus Torvalds 		goto brd_input;
20271da177e4SLinus Torvalds 
20281da177e4SLinus Torvalds 	/* Accept zero addresses only to limited broadcast;
20291da177e4SLinus Torvalds 	 * I even do not know to fix it or not. Waiting for complains :-)
20301da177e4SLinus Torvalds 	 */
2031f97c1e0cSJoe Perches 	if (ipv4_is_zeronet(saddr))
20321da177e4SLinus Torvalds 		goto martian_source;
20331da177e4SLinus Torvalds 
2034d0daebc3SThomas Graf 	if (ipv4_is_zeronet(daddr))
20351da177e4SLinus Torvalds 		goto martian_destination;
20361da177e4SLinus Torvalds 
20379eb43e76SEric Dumazet 	/* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
20389eb43e76SEric Dumazet 	 * and call it once if daddr or/and saddr are loopback addresses
20399eb43e76SEric Dumazet 	 */
20409eb43e76SEric Dumazet 	if (ipv4_is_loopback(daddr)) {
20419eb43e76SEric Dumazet 		if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
2042d0daebc3SThomas Graf 			goto martian_destination;
20439eb43e76SEric Dumazet 	} else if (ipv4_is_loopback(saddr)) {
20449eb43e76SEric Dumazet 		if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
2045d0daebc3SThomas Graf 			goto martian_source;
2046d0daebc3SThomas Graf 	}
2047d0daebc3SThomas Graf 
20481da177e4SLinus Torvalds 	/*
20491da177e4SLinus Torvalds 	 *	Now we are ready to route packet.
20501da177e4SLinus Torvalds 	 */
205168a5e3ddSDavid S. Miller 	fl4.flowi4_oif = 0;
2052e0d56fddSDavid Ahern 	fl4.flowi4_iif = dev->ifindex;
205368a5e3ddSDavid S. Miller 	fl4.flowi4_mark = skb->mark;
205468a5e3ddSDavid S. Miller 	fl4.flowi4_tos = tos;
205568a5e3ddSDavid S. Miller 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
2056b84f7878SDavid Ahern 	fl4.flowi4_flags = 0;
205768a5e3ddSDavid S. Miller 	fl4.daddr = daddr;
205868a5e3ddSDavid S. Miller 	fl4.saddr = saddr;
20598bcfd092SJulian Anastasov 	fl4.flowi4_uid = sock_net_uid(net, NULL);
2060e37b1e97SRoopa Prabhu 
20615a847a6eSDavid Ahern 	if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
2062e37b1e97SRoopa Prabhu 		flkeys = &_flkeys;
20635a847a6eSDavid Ahern 	} else {
20645a847a6eSDavid Ahern 		fl4.flowi4_proto = 0;
20655a847a6eSDavid Ahern 		fl4.fl4_sport = 0;
20665a847a6eSDavid Ahern 		fl4.fl4_dport = 0;
20675a847a6eSDavid Ahern 	}
2068e37b1e97SRoopa Prabhu 
20695510cdf7SDavid Ahern 	err = fib_lookup(net, &fl4, res, 0);
2070cd0f0b95SDuan Jiong 	if (err != 0) {
2071cd0f0b95SDuan Jiong 		if (!IN_DEV_FORWARD(in_dev))
2072cd0f0b95SDuan Jiong 			err = -EHOSTUNREACH;
20731da177e4SLinus Torvalds 		goto no_route;
2074cd0f0b95SDuan Jiong 	}
20751da177e4SLinus Torvalds 
20765cbf777cSXin Long 	if (res->type == RTN_BROADCAST) {
20775cbf777cSXin Long 		if (IN_DEV_BFORWARD(in_dev))
20785cbf777cSXin Long 			goto make_route;
20790a90478bSXin Long 		/* not do cache if bc_forwarding is enabled */
20800a90478bSXin Long 		if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
20810a90478bSXin Long 			do_cache = false;
20821da177e4SLinus Torvalds 		goto brd_input;
20835cbf777cSXin Long 	}
20841da177e4SLinus Torvalds 
20855510cdf7SDavid Ahern 	if (res->type == RTN_LOCAL) {
20865c04c819SMichael Smith 		err = fib_validate_source(skb, saddr, daddr, tos,
20870d5edc68SCong Wang 					  0, dev, in_dev, &itag);
2088b5f7e755SEric Dumazet 		if (err < 0)
20890d753960SDavid Ahern 			goto martian_source;
20901da177e4SLinus Torvalds 		goto local_input;
20911da177e4SLinus Torvalds 	}
20921da177e4SLinus Torvalds 
2093cd0f0b95SDuan Jiong 	if (!IN_DEV_FORWARD(in_dev)) {
2094cd0f0b95SDuan Jiong 		err = -EHOSTUNREACH;
2095251da413SDavid S. Miller 		goto no_route;
2096cd0f0b95SDuan Jiong 	}
20975510cdf7SDavid Ahern 	if (res->type != RTN_UNICAST)
20981da177e4SLinus Torvalds 		goto martian_destination;
20991da177e4SLinus Torvalds 
21005cbf777cSXin Long make_route:
2101e37b1e97SRoopa Prabhu 	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
21021da177e4SLinus Torvalds out:	return err;
21031da177e4SLinus Torvalds 
21041da177e4SLinus Torvalds brd_input:
21051da177e4SLinus Torvalds 	if (skb->protocol != htons(ETH_P_IP))
21061da177e4SLinus Torvalds 		goto e_inval;
21071da177e4SLinus Torvalds 
210841347dcdSDavid S. Miller 	if (!ipv4_is_zeronet(saddr)) {
21099e56e380SDavid S. Miller 		err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
21109e56e380SDavid S. Miller 					  in_dev, &itag);
21111da177e4SLinus Torvalds 		if (err < 0)
21120d753960SDavid Ahern 			goto martian_source;
21131da177e4SLinus Torvalds 	}
21141da177e4SLinus Torvalds 	flags |= RTCF_BROADCAST;
21155510cdf7SDavid Ahern 	res->type = RTN_BROADCAST;
21161da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_brd);
21171da177e4SLinus Torvalds 
21181da177e4SLinus Torvalds local_input:
21190a90478bSXin Long 	do_cache &= res->fi && !itag;
21200a90478bSXin Long 	if (do_cache) {
2121eba618abSDavid Ahern 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
2122eba618abSDavid Ahern 
21230f457a36SDavid Ahern 		rth = rcu_dereference(nhc->nhc_rth_input);
2124d2d68ba9SDavid S. Miller 		if (rt_cache_valid(rth)) {
2125c6cffba4SDavid S. Miller 			skb_dst_set_noref(skb, &rth->dst);
2126c6cffba4SDavid S. Miller 			err = 0;
2127c6cffba4SDavid S. Miller 			goto out;
2128d2d68ba9SDavid S. Miller 		}
2129d2d68ba9SDavid S. Miller 	}
2130d2d68ba9SDavid S. Miller 
2131f5a0aab8SDavid Ahern 	rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
21325510cdf7SDavid Ahern 			   flags | RTCF_LOCAL, res->type,
2133d2d68ba9SDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
21341da177e4SLinus Torvalds 	if (!rth)
21351da177e4SLinus Torvalds 		goto e_nobufs;
21361da177e4SLinus Torvalds 
2137d8d1f30bSChangli Gao 	rth->dst.output= ip_rt_bug;
2138cf911662SDavid S. Miller #ifdef CONFIG_IP_ROUTE_CLASSID
2139cf911662SDavid S. Miller 	rth->dst.tclassid = itag;
2140cf911662SDavid S. Miller #endif
21419917e1e8SDavid S. Miller 	rth->rt_is_input = 1;
2142571e7226SRoopa Prabhu 
2143a6254864SDuan Jiong 	RT_CACHE_STAT_INC(in_slow_tot);
21445510cdf7SDavid Ahern 	if (res->type == RTN_UNREACHABLE) {
2145d8d1f30bSChangli Gao 		rth->dst.input= ip_error;
2146d8d1f30bSChangli Gao 		rth->dst.error= -err;
21471da177e4SLinus Torvalds 		rth->rt_flags 	&= ~RTCF_LOCAL;
21481da177e4SLinus Torvalds 	}
2149efd85700SThomas Graf 
2150dcdfdf56SAlexei Starovoitov 	if (do_cache) {
2151eba618abSDavid Ahern 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
2152efd85700SThomas Graf 
2153eba618abSDavid Ahern 		rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
2154efd85700SThomas Graf 		if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2155efd85700SThomas Graf 			WARN_ON(rth->dst.input == lwtunnel_input);
2156efd85700SThomas Graf 			rth->dst.lwtstate->orig_input = rth->dst.input;
2157efd85700SThomas Graf 			rth->dst.input = lwtunnel_input;
2158efd85700SThomas Graf 		}
2159efd85700SThomas Graf 
216087063a1fSDavid Ahern 		if (unlikely(!rt_cache_route(nhc, rth)))
2161dcdfdf56SAlexei Starovoitov 			rt_add_uncached_list(rth);
2162dcdfdf56SAlexei Starovoitov 	}
216389aef892SDavid S. Miller 	skb_dst_set(skb, &rth->dst);
2164b23dd4feSDavid S. Miller 	err = 0;
2165ebc0ffaeSEric Dumazet 	goto out;
21661da177e4SLinus Torvalds 
21671da177e4SLinus Torvalds no_route:
21681da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_no_route);
21695510cdf7SDavid Ahern 	res->type = RTN_UNREACHABLE;
21705510cdf7SDavid Ahern 	res->fi = NULL;
21715510cdf7SDavid Ahern 	res->table = NULL;
21721da177e4SLinus Torvalds 	goto local_input;
21731da177e4SLinus Torvalds 
21741da177e4SLinus Torvalds 	/*
21751da177e4SLinus Torvalds 	 *	Do not cache martian addresses: they should be logged (RFC1812)
21761da177e4SLinus Torvalds 	 */
21771da177e4SLinus Torvalds martian_destination:
21781da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_martian_dst);
21791da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE
2180e87cc472SJoe Perches 	if (IN_DEV_LOG_MARTIANS(in_dev))
2181e87cc472SJoe Perches 		net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2182673d57e7SHarvey Harrison 				     &daddr, &saddr, dev->name);
21831da177e4SLinus Torvalds #endif
21842c2910a4SDietmar Eggemann 
21851da177e4SLinus Torvalds e_inval:
21861da177e4SLinus Torvalds 	err = -EINVAL;
2187ebc0ffaeSEric Dumazet 	goto out;
21881da177e4SLinus Torvalds 
21891da177e4SLinus Torvalds e_nobufs:
21901da177e4SLinus Torvalds 	err = -ENOBUFS;
2191ebc0ffaeSEric Dumazet 	goto out;
21921da177e4SLinus Torvalds 
21931da177e4SLinus Torvalds martian_source:
21941da177e4SLinus Torvalds 	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2195ebc0ffaeSEric Dumazet 	goto out;
21961da177e4SLinus Torvalds }
21971da177e4SLinus Torvalds 
2198c6cffba4SDavid S. Miller int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
219938a424e4SDavid Miller 			 u8 tos, struct net_device *dev)
22001da177e4SLinus Torvalds {
22015510cdf7SDavid Ahern 	struct fib_result res;
22025510cdf7SDavid Ahern 	int err;
22031da177e4SLinus Torvalds 
22046e28099dSJulian Anastasov 	tos &= IPTOS_RT_MASK;
220596d36220SEric Dumazet 	rcu_read_lock();
22065510cdf7SDavid Ahern 	err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
22075510cdf7SDavid Ahern 	rcu_read_unlock();
220896d36220SEric Dumazet 
22095510cdf7SDavid Ahern 	return err;
22105510cdf7SDavid Ahern }
22115510cdf7SDavid Ahern EXPORT_SYMBOL(ip_route_input_noref);
22125510cdf7SDavid Ahern 
22135510cdf7SDavid Ahern /* called with rcu_read_lock held */
22145510cdf7SDavid Ahern int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
22155510cdf7SDavid Ahern 		       u8 tos, struct net_device *dev, struct fib_result *res)
22165510cdf7SDavid Ahern {
22171da177e4SLinus Torvalds 	/* Multicast recognition logic is moved from route cache to here.
22181da177e4SLinus Torvalds 	   The problem was that too many Ethernet cards have broken/missing
22191da177e4SLinus Torvalds 	   hardware multicast filters :-( As result the host on multicasting
22201da177e4SLinus Torvalds 	   network acquires a lot of useless route cache entries, sort of
22211da177e4SLinus Torvalds 	   SDR messages from all the world. Now we try to get rid of them.
22221da177e4SLinus Torvalds 	   Really, provided software IP multicast filter is organized
22231da177e4SLinus Torvalds 	   reasonably (at least, hashed), it does not result in a slowdown
22241da177e4SLinus Torvalds 	   comparing with route cache reject entries.
22251da177e4SLinus Torvalds 	   Note, that multicast routers are not affected, because
22261da177e4SLinus Torvalds 	   route cache entry is created eventually.
22271da177e4SLinus Torvalds 	 */
2228f97c1e0cSJoe Perches 	if (ipv4_is_multicast(daddr)) {
222996d36220SEric Dumazet 		struct in_device *in_dev = __in_dev_get_rcu(dev);
2230e58e4159SDavid Ahern 		int our = 0;
22315510cdf7SDavid Ahern 		int err = -EINVAL;
22321da177e4SLinus Torvalds 
223322c74764SPaolo Abeni 		if (!in_dev)
223422c74764SPaolo Abeni 			return err;
2235e58e4159SDavid Ahern 		our = ip_check_mc_rcu(in_dev, daddr, saddr,
2236eddc9ec5SArnaldo Carvalho de Melo 				      ip_hdr(skb)->protocol);
2237e58e4159SDavid Ahern 
2238e58e4159SDavid Ahern 		/* check l3 master if no match yet */
223922c74764SPaolo Abeni 		if (!our && netif_is_l3_slave(dev)) {
2240e58e4159SDavid Ahern 			struct in_device *l3_in_dev;
2241e58e4159SDavid Ahern 
2242e58e4159SDavid Ahern 			l3_in_dev = __in_dev_get_rcu(skb->dev);
2243e58e4159SDavid Ahern 			if (l3_in_dev)
2244e58e4159SDavid Ahern 				our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2245e58e4159SDavid Ahern 						      ip_hdr(skb)->protocol);
2246e58e4159SDavid Ahern 		}
2247e58e4159SDavid Ahern 
22481da177e4SLinus Torvalds 		if (our
22491da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE
22509d4fb27dSJoe Perches 			||
22519d4fb27dSJoe Perches 		    (!ipv4_is_local_multicast(daddr) &&
2252f97c1e0cSJoe Perches 		     IN_DEV_MFORWARD(in_dev))
22531da177e4SLinus Torvalds #endif
22541da177e4SLinus Torvalds 		   ) {
22555510cdf7SDavid Ahern 			err = ip_route_input_mc(skb, daddr, saddr,
22561da177e4SLinus Torvalds 						tos, dev, our);
2257e58e4159SDavid Ahern 		}
22585510cdf7SDavid Ahern 		return err;
22591da177e4SLinus Torvalds 	}
22605510cdf7SDavid Ahern 
22615510cdf7SDavid Ahern 	return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
22621da177e4SLinus Torvalds }
22631da177e4SLinus Torvalds 
2264ebc0ffaeSEric Dumazet /* called with rcu_read_lock() */
2265982721f3SDavid S. Miller static struct rtable *__mkroute_output(const struct fib_result *res,
22661a00fee4SDavid Miller 				       const struct flowi4 *fl4, int orig_oif,
2267f61759e6SJulian Anastasov 				       struct net_device *dev_out,
22685ada5527SDavid S. Miller 				       unsigned int flags)
22691da177e4SLinus Torvalds {
2270982721f3SDavid S. Miller 	struct fib_info *fi = res->fi;
2271f2bb4bedSDavid S. Miller 	struct fib_nh_exception *fnhe;
22725ada5527SDavid S. Miller 	struct in_device *in_dev;
2273982721f3SDavid S. Miller 	u16 type = res->type;
22745ada5527SDavid S. Miller 	struct rtable *rth;
2275c92b9655SJulian Anastasov 	bool do_cache;
22761da177e4SLinus Torvalds 
2277d0daebc3SThomas Graf 	in_dev = __in_dev_get_rcu(dev_out);
2278d0daebc3SThomas Graf 	if (!in_dev)
2279d0daebc3SThomas Graf 		return ERR_PTR(-EINVAL);
2280d0daebc3SThomas Graf 
2281d0daebc3SThomas Graf 	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
22825f02ce24SDavid Ahern 		if (ipv4_is_loopback(fl4->saddr) &&
22835f02ce24SDavid Ahern 		    !(dev_out->flags & IFF_LOOPBACK) &&
22845f02ce24SDavid Ahern 		    !netif_is_l3_master(dev_out))
22855ada5527SDavid S. Miller 			return ERR_PTR(-EINVAL);
22861da177e4SLinus Torvalds 
228768a5e3ddSDavid S. Miller 	if (ipv4_is_lbcast(fl4->daddr))
2288982721f3SDavid S. Miller 		type = RTN_BROADCAST;
228968a5e3ddSDavid S. Miller 	else if (ipv4_is_multicast(fl4->daddr))
2290982721f3SDavid S. Miller 		type = RTN_MULTICAST;
229168a5e3ddSDavid S. Miller 	else if (ipv4_is_zeronet(fl4->daddr))
22925ada5527SDavid S. Miller 		return ERR_PTR(-EINVAL);
22931da177e4SLinus Torvalds 
22941da177e4SLinus Torvalds 	if (dev_out->flags & IFF_LOOPBACK)
22951da177e4SLinus Torvalds 		flags |= RTCF_LOCAL;
22961da177e4SLinus Torvalds 
229763617421SJulian Anastasov 	do_cache = true;
2298982721f3SDavid S. Miller 	if (type == RTN_BROADCAST) {
22991da177e4SLinus Torvalds 		flags |= RTCF_BROADCAST | RTCF_LOCAL;
2300982721f3SDavid S. Miller 		fi = NULL;
2301982721f3SDavid S. Miller 	} else if (type == RTN_MULTICAST) {
23021da177e4SLinus Torvalds 		flags |= RTCF_MULTICAST | RTCF_LOCAL;
2303813b3b5dSDavid S. Miller 		if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2304813b3b5dSDavid S. Miller 				     fl4->flowi4_proto))
23051da177e4SLinus Torvalds 			flags &= ~RTCF_LOCAL;
230663617421SJulian Anastasov 		else
230763617421SJulian Anastasov 			do_cache = false;
23081da177e4SLinus Torvalds 		/* If multicast route do not exist use
2309dd28d1a0SEric Dumazet 		 * default one, but do not gateway in this case.
2310dd28d1a0SEric Dumazet 		 * Yes, it is hack.
23111da177e4SLinus Torvalds 		 */
2312982721f3SDavid S. Miller 		if (fi && res->prefixlen < 4)
2313982721f3SDavid S. Miller 			fi = NULL;
2314d6d5e999SChris Friesen 	} else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2315d6d5e999SChris Friesen 		   (orig_oif != dev_out->ifindex)) {
2316d6d5e999SChris Friesen 		/* For local routes that require a particular output interface
2317d6d5e999SChris Friesen 		 * we do not want to cache the result.  Caching the result
2318d6d5e999SChris Friesen 		 * causes incorrect behaviour when there are multiple source
2319d6d5e999SChris Friesen 		 * addresses on the interface, the end result being that if the
2320d6d5e999SChris Friesen 		 * intended recipient is waiting on that interface for the
2321d6d5e999SChris Friesen 		 * packet he won't receive it because it will be delivered on
2322d6d5e999SChris Friesen 		 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2323d6d5e999SChris Friesen 		 * be set to the loopback interface as well.
2324d6d5e999SChris Friesen 		 */
232594720e3aSJulian Anastasov 		do_cache = false;
23261da177e4SLinus Torvalds 	}
23271da177e4SLinus Torvalds 
2328f2bb4bedSDavid S. Miller 	fnhe = NULL;
232963617421SJulian Anastasov 	do_cache &= fi != NULL;
233094720e3aSJulian Anastasov 	if (fi) {
2331eba618abSDavid Ahern 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
2332d26b3a7cSEric Dumazet 		struct rtable __rcu **prth;
2333d26b3a7cSEric Dumazet 
2334a5995e71SDavid Ahern 		fnhe = find_exception(nhc, fl4->daddr);
233594720e3aSJulian Anastasov 		if (!do_cache)
233694720e3aSJulian Anastasov 			goto add;
2337deed49dfSXin Long 		if (fnhe) {
23382ffae99dSTimo Teräs 			prth = &fnhe->fnhe_rth_output;
2339deed49dfSXin Long 		} else {
2340c92b9655SJulian Anastasov 			if (unlikely(fl4->flowi4_flags &
2341c92b9655SJulian Anastasov 				     FLOWI_FLAG_KNOWN_NH &&
2342bdf00467SDavid Ahern 				     !(nhc->nhc_gw_family &&
2343eba618abSDavid Ahern 				       nhc->nhc_scope == RT_SCOPE_LINK))) {
2344c92b9655SJulian Anastasov 				do_cache = false;
2345c92b9655SJulian Anastasov 				goto add;
2346c92b9655SJulian Anastasov 			}
23470f457a36SDavid Ahern 			prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
234894720e3aSJulian Anastasov 		}
2349d26b3a7cSEric Dumazet 		rth = rcu_dereference(*prth);
23509df16efaSWei Wang 		if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
2351f2bb4bedSDavid S. Miller 			return rth;
2352f2bb4bedSDavid S. Miller 	}
2353c92b9655SJulian Anastasov 
2354c92b9655SJulian Anastasov add:
2355d08c4f35SDavid Ahern 	rth = rt_dst_alloc(dev_out, flags, type,
23565c1e6aa3SDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
2357f2bb4bedSDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOXFRM),
2358c92b9655SJulian Anastasov 			   do_cache);
23598391d07bSDimitris Michailidis 	if (!rth)
23605ada5527SDavid S. Miller 		return ERR_PTR(-ENOBUFS);
23618391d07bSDimitris Michailidis 
23629438c871SDavid Ahern 	rth->rt_iif = orig_oif;
2363b7503e0cSDavid Ahern 
23641da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(out_slow_tot);
23651da177e4SLinus Torvalds 
23661da177e4SLinus Torvalds 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
23671da177e4SLinus Torvalds 		if (flags & RTCF_LOCAL &&
23681da177e4SLinus Torvalds 		    !(dev_out->flags & IFF_LOOPBACK)) {
2369d8d1f30bSChangli Gao 			rth->dst.output = ip_mc_output;
23701da177e4SLinus Torvalds 			RT_CACHE_STAT_INC(out_slow_mc);
23711da177e4SLinus Torvalds 		}
23721da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE
2373982721f3SDavid S. Miller 		if (type == RTN_MULTICAST) {
23741da177e4SLinus Torvalds 			if (IN_DEV_MFORWARD(in_dev) &&
2375813b3b5dSDavid S. Miller 			    !ipv4_is_local_multicast(fl4->daddr)) {
2376d8d1f30bSChangli Gao 				rth->dst.input = ip_mr_input;
2377d8d1f30bSChangli Gao 				rth->dst.output = ip_mc_output;
23781da177e4SLinus Torvalds 			}
23791da177e4SLinus Torvalds 		}
23801da177e4SLinus Torvalds #endif
23811da177e4SLinus Torvalds 	}
23821da177e4SLinus Torvalds 
2383a4c2fd7fSWei Wang 	rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
23849942895bSDavid Ahern 	lwtunnel_set_redirect(&rth->dst);
23851da177e4SLinus Torvalds 
23865ada5527SDavid S. Miller 	return rth;
23871da177e4SLinus Torvalds }
23881da177e4SLinus Torvalds 
23891da177e4SLinus Torvalds /*
23901da177e4SLinus Torvalds  * Major route resolver routine.
23911da177e4SLinus Torvalds  */
23921da177e4SLinus Torvalds 
23933abd1adeSDavid Ahern struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2394bf4e0a3dSNikolay Aleksandrov 					const struct sk_buff *skb)
23951da177e4SLinus Torvalds {
2396f61759e6SJulian Anastasov 	__u8 tos = RT_FL_TOS(fl4);
2397d0ea2b12SEric Dumazet 	struct fib_result res = {
2398d0ea2b12SEric Dumazet 		.type		= RTN_UNSPEC,
2399d0ea2b12SEric Dumazet 		.fi		= NULL,
2400d0ea2b12SEric Dumazet 		.table		= NULL,
2401d0ea2b12SEric Dumazet 		.tclassid	= 0,
2402d0ea2b12SEric Dumazet 	};
24035ada5527SDavid S. Miller 	struct rtable *rth;
24041da177e4SLinus Torvalds 
24051fb9489bSPavel Emelyanov 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
2406813b3b5dSDavid S. Miller 	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2407813b3b5dSDavid S. Miller 	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
240844713b67SDavid S. Miller 			 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
240944713b67SDavid S. Miller 
2410010c2708SDavid S. Miller 	rcu_read_lock();
24113abd1adeSDavid Ahern 	rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
24123abd1adeSDavid Ahern 	rcu_read_unlock();
24133abd1adeSDavid Ahern 
24143abd1adeSDavid Ahern 	return rth;
24153abd1adeSDavid Ahern }
24163abd1adeSDavid Ahern EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
24173abd1adeSDavid Ahern 
24183abd1adeSDavid Ahern struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
24193abd1adeSDavid Ahern 					    struct fib_result *res,
24203abd1adeSDavid Ahern 					    const struct sk_buff *skb)
24213abd1adeSDavid Ahern {
24223abd1adeSDavid Ahern 	struct net_device *dev_out = NULL;
24233abd1adeSDavid Ahern 	int orig_oif = fl4->flowi4_oif;
24243abd1adeSDavid Ahern 	unsigned int flags = 0;
24253abd1adeSDavid Ahern 	struct rtable *rth;
24263abd1adeSDavid Ahern 	int err = -ENETUNREACH;
24273abd1adeSDavid Ahern 
2428813b3b5dSDavid S. Miller 	if (fl4->saddr) {
2429b23dd4feSDavid S. Miller 		rth = ERR_PTR(-EINVAL);
2430813b3b5dSDavid S. Miller 		if (ipv4_is_multicast(fl4->saddr) ||
2431813b3b5dSDavid S. Miller 		    ipv4_is_lbcast(fl4->saddr) ||
2432813b3b5dSDavid S. Miller 		    ipv4_is_zeronet(fl4->saddr))
24331da177e4SLinus Torvalds 			goto out;
24341da177e4SLinus Torvalds 
24351da177e4SLinus Torvalds 		/* I removed check for oif == dev_out->oif here.
24361da177e4SLinus Torvalds 		   It was wrong for two reasons:
24371ab35276SDenis V. Lunev 		   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
24381ab35276SDenis V. Lunev 		      is assigned to multiple interfaces.
24391da177e4SLinus Torvalds 		   2. Moreover, we are allowed to send packets with saddr
24401da177e4SLinus Torvalds 		      of another iface. --ANK
24411da177e4SLinus Torvalds 		 */
24421da177e4SLinus Torvalds 
2443813b3b5dSDavid S. Miller 		if (fl4->flowi4_oif == 0 &&
2444813b3b5dSDavid S. Miller 		    (ipv4_is_multicast(fl4->daddr) ||
2445813b3b5dSDavid S. Miller 		     ipv4_is_lbcast(fl4->daddr))) {
2446a210d01aSJulian Anastasov 			/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2447813b3b5dSDavid S. Miller 			dev_out = __ip_dev_find(net, fl4->saddr, false);
244851456b29SIan Morris 			if (!dev_out)
2449a210d01aSJulian Anastasov 				goto out;
2450a210d01aSJulian Anastasov 
24511da177e4SLinus Torvalds 			/* Special hack: user can direct multicasts
24521da177e4SLinus Torvalds 			   and limited broadcast via necessary interface
24531da177e4SLinus Torvalds 			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
24541da177e4SLinus Torvalds 			   This hack is not just for fun, it allows
24551da177e4SLinus Torvalds 			   vic,vat and friends to work.
24561da177e4SLinus Torvalds 			   They bind socket to loopback, set ttl to zero
24571da177e4SLinus Torvalds 			   and expect that it will work.
24581da177e4SLinus Torvalds 			   From the viewpoint of routing cache they are broken,
24591da177e4SLinus Torvalds 			   because we are not allowed to build multicast path
24601da177e4SLinus Torvalds 			   with loopback source addr (look, routing cache
24611da177e4SLinus Torvalds 			   cannot know, that ttl is zero, so that packet
24621da177e4SLinus Torvalds 			   will not leave this host and route is valid).
24631da177e4SLinus Torvalds 			   Luckily, this hack is good workaround.
24641da177e4SLinus Torvalds 			 */
24651da177e4SLinus Torvalds 
2466813b3b5dSDavid S. Miller 			fl4->flowi4_oif = dev_out->ifindex;
24671da177e4SLinus Torvalds 			goto make_route;
24681da177e4SLinus Torvalds 		}
2469a210d01aSJulian Anastasov 
2470813b3b5dSDavid S. Miller 		if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2471a210d01aSJulian Anastasov 			/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2472813b3b5dSDavid S. Miller 			if (!__ip_dev_find(net, fl4->saddr, false))
2473a210d01aSJulian Anastasov 				goto out;
24741da177e4SLinus Torvalds 		}
2475a210d01aSJulian Anastasov 	}
24761da177e4SLinus Torvalds 
24771da177e4SLinus Torvalds 
2478813b3b5dSDavid S. Miller 	if (fl4->flowi4_oif) {
2479813b3b5dSDavid S. Miller 		dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2480b23dd4feSDavid S. Miller 		rth = ERR_PTR(-ENODEV);
248151456b29SIan Morris 		if (!dev_out)
24821da177e4SLinus Torvalds 			goto out;
2483e5ed6399SHerbert Xu 
2484e5ed6399SHerbert Xu 		/* RACE: Check return value of inet_select_addr instead. */
2485fc75fc83SEric Dumazet 		if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2486b23dd4feSDavid S. Miller 			rth = ERR_PTR(-ENETUNREACH);
2487fc75fc83SEric Dumazet 			goto out;
2488fc75fc83SEric Dumazet 		}
2489813b3b5dSDavid S. Miller 		if (ipv4_is_local_multicast(fl4->daddr) ||
24906a211654SAndrew Lunn 		    ipv4_is_lbcast(fl4->daddr) ||
24916a211654SAndrew Lunn 		    fl4->flowi4_proto == IPPROTO_IGMP) {
2492813b3b5dSDavid S. Miller 			if (!fl4->saddr)
2493813b3b5dSDavid S. Miller 				fl4->saddr = inet_select_addr(dev_out, 0,
24941da177e4SLinus Torvalds 							      RT_SCOPE_LINK);
24951da177e4SLinus Torvalds 			goto make_route;
24961da177e4SLinus Torvalds 		}
24970a7e2260SJiri Benc 		if (!fl4->saddr) {
2498813b3b5dSDavid S. Miller 			if (ipv4_is_multicast(fl4->daddr))
2499813b3b5dSDavid S. Miller 				fl4->saddr = inet_select_addr(dev_out, 0,
2500813b3b5dSDavid S. Miller 							      fl4->flowi4_scope);
2501813b3b5dSDavid S. Miller 			else if (!fl4->daddr)
2502813b3b5dSDavid S. Miller 				fl4->saddr = inet_select_addr(dev_out, 0,
25031da177e4SLinus Torvalds 							      RT_SCOPE_HOST);
25041da177e4SLinus Torvalds 		}
2505613d09b3SDavid Ahern 	}
25061da177e4SLinus Torvalds 
2507813b3b5dSDavid S. Miller 	if (!fl4->daddr) {
2508813b3b5dSDavid S. Miller 		fl4->daddr = fl4->saddr;
2509813b3b5dSDavid S. Miller 		if (!fl4->daddr)
2510813b3b5dSDavid S. Miller 			fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2511b40afd0eSDenis V. Lunev 		dev_out = net->loopback_dev;
25121fb9489bSPavel Emelyanov 		fl4->flowi4_oif = LOOPBACK_IFINDEX;
25133abd1adeSDavid Ahern 		res->type = RTN_LOCAL;
25141da177e4SLinus Torvalds 		flags |= RTCF_LOCAL;
25151da177e4SLinus Torvalds 		goto make_route;
25161da177e4SLinus Torvalds 	}
25171da177e4SLinus Torvalds 
25183abd1adeSDavid Ahern 	err = fib_lookup(net, fl4, res, 0);
25190315e382SNikola Forró 	if (err) {
25203abd1adeSDavid Ahern 		res->fi = NULL;
25213abd1adeSDavid Ahern 		res->table = NULL;
25226104e112SDavid Ahern 		if (fl4->flowi4_oif &&
2523e58e4159SDavid Ahern 		    (ipv4_is_multicast(fl4->daddr) ||
2524e58e4159SDavid Ahern 		    !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
25251da177e4SLinus Torvalds 			/* Apparently, routing tables are wrong. Assume,
25261da177e4SLinus Torvalds 			   that the destination is on link.
25271da177e4SLinus Torvalds 
25281da177e4SLinus Torvalds 			   WHY? DW.
25291da177e4SLinus Torvalds 			   Because we are allowed to send to iface
25301da177e4SLinus Torvalds 			   even if it has NO routes and NO assigned
25311da177e4SLinus Torvalds 			   addresses. When oif is specified, routing
25321da177e4SLinus Torvalds 			   tables are looked up with only one purpose:
25331da177e4SLinus Torvalds 			   to catch if destination is gatewayed, rather than
25341da177e4SLinus Torvalds 			   direct. Moreover, if MSG_DONTROUTE is set,
25351da177e4SLinus Torvalds 			   we send packet, ignoring both routing tables
25361da177e4SLinus Torvalds 			   and ifaddr state. --ANK
25371da177e4SLinus Torvalds 
25381da177e4SLinus Torvalds 
25391da177e4SLinus Torvalds 			   We could make it even if oif is unknown,
25401da177e4SLinus Torvalds 			   likely IPv6, but we do not.
25411da177e4SLinus Torvalds 			 */
25421da177e4SLinus Torvalds 
2543813b3b5dSDavid S. Miller 			if (fl4->saddr == 0)
2544813b3b5dSDavid S. Miller 				fl4->saddr = inet_select_addr(dev_out, 0,
25451da177e4SLinus Torvalds 							      RT_SCOPE_LINK);
25463abd1adeSDavid Ahern 			res->type = RTN_UNICAST;
25471da177e4SLinus Torvalds 			goto make_route;
25481da177e4SLinus Torvalds 		}
25490315e382SNikola Forró 		rth = ERR_PTR(err);
25501da177e4SLinus Torvalds 		goto out;
25511da177e4SLinus Torvalds 	}
25521da177e4SLinus Torvalds 
25533abd1adeSDavid Ahern 	if (res->type == RTN_LOCAL) {
2554813b3b5dSDavid S. Miller 		if (!fl4->saddr) {
25553abd1adeSDavid Ahern 			if (res->fi->fib_prefsrc)
25563abd1adeSDavid Ahern 				fl4->saddr = res->fi->fib_prefsrc;
25579fc3bbb4SJoel Sing 			else
2558813b3b5dSDavid S. Miller 				fl4->saddr = fl4->daddr;
25599fc3bbb4SJoel Sing 		}
25605f02ce24SDavid Ahern 
25615f02ce24SDavid Ahern 		/* L3 master device is the loopback for that domain */
25623abd1adeSDavid Ahern 		dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
2563b7c8487cSRobert Shearman 			net->loopback_dev;
2564839da4d9SDavid Ahern 
2565839da4d9SDavid Ahern 		/* make sure orig_oif points to fib result device even
2566839da4d9SDavid Ahern 		 * though packet rx/tx happens over loopback or l3mdev
2567839da4d9SDavid Ahern 		 */
2568839da4d9SDavid Ahern 		orig_oif = FIB_RES_OIF(*res);
2569839da4d9SDavid Ahern 
2570813b3b5dSDavid S. Miller 		fl4->flowi4_oif = dev_out->ifindex;
25711da177e4SLinus Torvalds 		flags |= RTCF_LOCAL;
25721da177e4SLinus Torvalds 		goto make_route;
25731da177e4SLinus Torvalds 	}
25741da177e4SLinus Torvalds 
25753abd1adeSDavid Ahern 	fib_select_path(net, res, fl4, skb);
25761da177e4SLinus Torvalds 
25773abd1adeSDavid Ahern 	dev_out = FIB_RES_DEV(*res);
2578813b3b5dSDavid S. Miller 	fl4->flowi4_oif = dev_out->ifindex;
25791da177e4SLinus Torvalds 
25801da177e4SLinus Torvalds 
25811da177e4SLinus Torvalds make_route:
25823abd1adeSDavid Ahern 	rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
25831da177e4SLinus Torvalds 
2584010c2708SDavid S. Miller out:
2585b23dd4feSDavid S. Miller 	return rth;
25861da177e4SLinus Torvalds }
2587d8c97a94SArnaldo Carvalho de Melo 
2588ae2688d5SJianzhao Wang static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2589ae2688d5SJianzhao Wang {
2590ae2688d5SJianzhao Wang 	return NULL;
2591ae2688d5SJianzhao Wang }
2592ae2688d5SJianzhao Wang 
2593ebb762f2SSteffen Klassert static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2594ec831ea7SRoland Dreier {
2595618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2596618f9bc7SSteffen Klassert 
2597618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
2598ec831ea7SRoland Dreier }
2599ec831ea7SRoland Dreier 
26006700c270SDavid S. Miller static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
26016700c270SDavid S. Miller 					  struct sk_buff *skb, u32 mtu)
260214e50e57SDavid S. Miller {
260314e50e57SDavid S. Miller }
260414e50e57SDavid S. Miller 
26056700c270SDavid S. Miller static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
26066700c270SDavid S. Miller 				       struct sk_buff *skb)
2607b587ee3bSDavid S. Miller {
2608b587ee3bSDavid S. Miller }
2609b587ee3bSDavid S. Miller 
26100972ddb2SHeld Bernhard static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
26110972ddb2SHeld Bernhard 					  unsigned long old)
26120972ddb2SHeld Bernhard {
26130972ddb2SHeld Bernhard 	return NULL;
26140972ddb2SHeld Bernhard }
26150972ddb2SHeld Bernhard 
261614e50e57SDavid S. Miller static struct dst_ops ipv4_dst_blackhole_ops = {
261714e50e57SDavid S. Miller 	.family			=	AF_INET,
2618ae2688d5SJianzhao Wang 	.check			=	ipv4_blackhole_dst_check,
2619ebb762f2SSteffen Klassert 	.mtu			=	ipv4_blackhole_mtu,
2620214f45c9SEric Dumazet 	.default_advmss		=	ipv4_default_advmss,
262114e50e57SDavid S. Miller 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
2622b587ee3bSDavid S. Miller 	.redirect		=	ipv4_rt_blackhole_redirect,
26230972ddb2SHeld Bernhard 	.cow_metrics		=	ipv4_rt_blackhole_cow_metrics,
2624d3aaeb38SDavid S. Miller 	.neigh_lookup		=	ipv4_neigh_lookup,
262514e50e57SDavid S. Miller };
262614e50e57SDavid S. Miller 
26272774c131SDavid S. Miller struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
262814e50e57SDavid S. Miller {
26292774c131SDavid S. Miller 	struct rtable *ort = (struct rtable *) dst_orig;
2630f5b0a874SDavid S. Miller 	struct rtable *rt;
263114e50e57SDavid S. Miller 
26326c0e7284SSteffen Klassert 	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
263314e50e57SDavid S. Miller 	if (rt) {
2634d8d1f30bSChangli Gao 		struct dst_entry *new = &rt->dst;
263514e50e57SDavid S. Miller 
263614e50e57SDavid S. Miller 		new->__use = 1;
2637352e512cSHerbert Xu 		new->input = dst_discard;
2638ede2059dSEric W. Biederman 		new->output = dst_discard_out;
263914e50e57SDavid S. Miller 
26401dbe3252SWei Wang 		new->dev = net->loopback_dev;
264114e50e57SDavid S. Miller 		if (new->dev)
264214e50e57SDavid S. Miller 			dev_hold(new->dev);
264314e50e57SDavid S. Miller 
26449917e1e8SDavid S. Miller 		rt->rt_is_input = ort->rt_is_input;
26455e2b61f7SDavid S. Miller 		rt->rt_iif = ort->rt_iif;
26465943634fSDavid S. Miller 		rt->rt_pmtu = ort->rt_pmtu;
2647d52e5a7eSSabrina Dubroca 		rt->rt_mtu_locked = ort->rt_mtu_locked;
264814e50e57SDavid S. Miller 
2649ca4c3fc2Sfan.du 		rt->rt_genid = rt_genid_ipv4(net);
265014e50e57SDavid S. Miller 		rt->rt_flags = ort->rt_flags;
265114e50e57SDavid S. Miller 		rt->rt_type = ort->rt_type;
26521550c171SDavid Ahern 		rt->rt_gw_family = ort->rt_gw_family;
26531550c171SDavid Ahern 		if (rt->rt_gw_family == AF_INET)
26541550c171SDavid Ahern 			rt->rt_gw4 = ort->rt_gw4;
26550f5f7d7bSDavid Ahern 		else if (rt->rt_gw_family == AF_INET6)
26560f5f7d7bSDavid Ahern 			rt->rt_gw6 = ort->rt_gw6;
265714e50e57SDavid S. Miller 
2658caacf05eSDavid S. Miller 		INIT_LIST_HEAD(&rt->rt_uncached);
265914e50e57SDavid S. Miller 	}
266014e50e57SDavid S. Miller 
26612774c131SDavid S. Miller 	dst_release(dst_orig);
26622774c131SDavid S. Miller 
26632774c131SDavid S. Miller 	return rt ? &rt->dst : ERR_PTR(-ENOMEM);
266414e50e57SDavid S. Miller }
266514e50e57SDavid S. Miller 
26669d6ec938SDavid S. Miller struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
26676f9c9615SEric Dumazet 				    const struct sock *sk)
26681da177e4SLinus Torvalds {
26699d6ec938SDavid S. Miller 	struct rtable *rt = __ip_route_output_key(net, flp4);
26701da177e4SLinus Torvalds 
2671b23dd4feSDavid S. Miller 	if (IS_ERR(rt))
2672b23dd4feSDavid S. Miller 		return rt;
26731da177e4SLinus Torvalds 
267456157872SDavid S. Miller 	if (flp4->flowi4_proto)
2675f92ee619SSteffen Klassert 		rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
26769d6ec938SDavid S. Miller 							flowi4_to_flowi(flp4),
26779d6ec938SDavid S. Miller 							sk, 0);
26781da177e4SLinus Torvalds 
2679b23dd4feSDavid S. Miller 	return rt;
26801da177e4SLinus Torvalds }
2681d8c97a94SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(ip_route_output_flow);
2682d8c97a94SArnaldo Carvalho de Melo 
26833765d35eSDavid Ahern /* called with rcu_read_lock held */
2684404eb77eSRoopa Prabhu static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2685404eb77eSRoopa Prabhu 			struct rtable *rt, u32 table_id, struct flowi4 *fl4,
2686404eb77eSRoopa Prabhu 			struct sk_buff *skb, u32 portid, u32 seq)
26871da177e4SLinus Torvalds {
26881da177e4SLinus Torvalds 	struct rtmsg *r;
26891da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
26902bc8ca40SSteffen Klassert 	unsigned long expires = 0;
2691f185071dSDavid S. Miller 	u32 error;
2692521f5490SJulian Anastasov 	u32 metrics[RTAX_MAX];
2693be403ea1SThomas Graf 
2694d3166e0cSDavid Ahern 	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
269551456b29SIan Morris 	if (!nlh)
269626932566SPatrick McHardy 		return -EMSGSIZE;
2697be403ea1SThomas Graf 
2698be403ea1SThomas Graf 	r = nlmsg_data(nlh);
26991da177e4SLinus Torvalds 	r->rtm_family	 = AF_INET;
27001da177e4SLinus Torvalds 	r->rtm_dst_len	= 32;
27011da177e4SLinus Torvalds 	r->rtm_src_len	= 0;
2702*d948974cSStefano Brivio 	r->rtm_tos	= fl4 ? fl4->flowi4_tos : 0;
27038a430ed5SDavid Ahern 	r->rtm_table	= table_id < 256 ? table_id : RT_TABLE_COMPAT;
2704c36ba660SDavid Ahern 	if (nla_put_u32(skb, RTA_TABLE, table_id))
2705f3756b79SDavid S. Miller 		goto nla_put_failure;
27061da177e4SLinus Torvalds 	r->rtm_type	= rt->rt_type;
27071da177e4SLinus Torvalds 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
27081da177e4SLinus Torvalds 	r->rtm_protocol = RTPROT_UNSPEC;
27091da177e4SLinus Torvalds 	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
27101da177e4SLinus Torvalds 	if (rt->rt_flags & RTCF_NOTIFY)
27111da177e4SLinus Torvalds 		r->rtm_flags |= RTM_F_NOTIFY;
2712df4d9254SHannes Frederic Sowa 	if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2713df4d9254SHannes Frederic Sowa 		r->rtm_flags |= RTCF_DOREDIRECT;
2714be403ea1SThomas Graf 
2715930345eaSJiri Benc 	if (nla_put_in_addr(skb, RTA_DST, dst))
2716f3756b79SDavid S. Miller 		goto nla_put_failure;
27171a00fee4SDavid Miller 	if (src) {
27181da177e4SLinus Torvalds 		r->rtm_src_len = 32;
2719930345eaSJiri Benc 		if (nla_put_in_addr(skb, RTA_SRC, src))
2720f3756b79SDavid S. Miller 			goto nla_put_failure;
27211da177e4SLinus Torvalds 	}
2722f3756b79SDavid S. Miller 	if (rt->dst.dev &&
2723f3756b79SDavid S. Miller 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2724f3756b79SDavid S. Miller 		goto nla_put_failure;
2725c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
2726f3756b79SDavid S. Miller 	if (rt->dst.tclassid &&
2727f3756b79SDavid S. Miller 	    nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2728f3756b79SDavid S. Miller 		goto nla_put_failure;
27291da177e4SLinus Torvalds #endif
2730*d948974cSStefano Brivio 	if (fl4 && !rt_is_input_route(rt) &&
2731d6c0a4f6SDavid Miller 	    fl4->saddr != src) {
2732930345eaSJiri Benc 		if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
2733f3756b79SDavid S. Miller 			goto nla_put_failure;
2734f3756b79SDavid S. Miller 	}
27351550c171SDavid Ahern 	if (rt->rt_gw_family == AF_INET &&
27360f5f7d7bSDavid Ahern 	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
2737f3756b79SDavid S. Miller 		goto nla_put_failure;
27380f5f7d7bSDavid Ahern 	} else if (rt->rt_gw_family == AF_INET6) {
27390f5f7d7bSDavid Ahern 		int alen = sizeof(struct in6_addr);
27400f5f7d7bSDavid Ahern 		struct nlattr *nla;
27410f5f7d7bSDavid Ahern 		struct rtvia *via;
27420f5f7d7bSDavid Ahern 
27430f5f7d7bSDavid Ahern 		nla = nla_reserve(skb, RTA_VIA, alen + 2);
27440f5f7d7bSDavid Ahern 		if (!nla)
27450f5f7d7bSDavid Ahern 			goto nla_put_failure;
27460f5f7d7bSDavid Ahern 
27470f5f7d7bSDavid Ahern 		via = nla_data(nla);
27480f5f7d7bSDavid Ahern 		via->rtvia_family = AF_INET6;
27490f5f7d7bSDavid Ahern 		memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
27500f5f7d7bSDavid Ahern 	}
2751be403ea1SThomas Graf 
2752ee9a8f7aSSteffen Klassert 	expires = rt->dst.expires;
2753ee9a8f7aSSteffen Klassert 	if (expires) {
2754ee9a8f7aSSteffen Klassert 		unsigned long now = jiffies;
2755ee9a8f7aSSteffen Klassert 
2756ee9a8f7aSSteffen Klassert 		if (time_before(now, expires))
2757ee9a8f7aSSteffen Klassert 			expires -= now;
2758ee9a8f7aSSteffen Klassert 		else
2759ee9a8f7aSSteffen Klassert 			expires = 0;
2760ee9a8f7aSSteffen Klassert 	}
2761ee9a8f7aSSteffen Klassert 
2762521f5490SJulian Anastasov 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2763ee9a8f7aSSteffen Klassert 	if (rt->rt_pmtu && expires)
2764521f5490SJulian Anastasov 		metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2765d52e5a7eSSabrina Dubroca 	if (rt->rt_mtu_locked && expires)
2766d52e5a7eSSabrina Dubroca 		metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
2767521f5490SJulian Anastasov 	if (rtnetlink_put_metrics(skb, metrics) < 0)
2768be403ea1SThomas Graf 		goto nla_put_failure;
2769be403ea1SThomas Graf 
2770*d948974cSStefano Brivio 	if (fl4) {
2771b4869889SDavid Miller 		if (fl4->flowi4_mark &&
277268aaed54Sstephen hemminger 		    nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2773f3756b79SDavid S. Miller 			goto nla_put_failure;
2774963bfeeeSEric Dumazet 
2775622ec2c9SLorenzo Colitti 		if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2776622ec2c9SLorenzo Colitti 		    nla_put_u32(skb, RTA_UID,
2777*d948974cSStefano Brivio 				from_kuid_munged(current_user_ns(),
2778*d948974cSStefano Brivio 						 fl4->flowi4_uid)))
2779622ec2c9SLorenzo Colitti 			goto nla_put_failure;
2780622ec2c9SLorenzo Colitti 
2781c7537967SDavid S. Miller 		if (rt_is_input_route(rt)) {
27828caaf7b6SNicolas Dichtel #ifdef CONFIG_IP_MROUTE
2783*d948974cSStefano Brivio 			if (ipv4_is_multicast(dst) &&
2784*d948974cSStefano Brivio 			    !ipv4_is_local_multicast(dst) &&
27858caaf7b6SNicolas Dichtel 			    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
27868caaf7b6SNicolas Dichtel 				int err = ipmr_get_route(net, skb,
27878caaf7b6SNicolas Dichtel 							 fl4->saddr, fl4->daddr,
27889f09eaeaSDavid Ahern 							 r, portid);
27892cf75070SNikolay Aleksandrov 
27908caaf7b6SNicolas Dichtel 				if (err <= 0) {
27918caaf7b6SNicolas Dichtel 					if (err == 0)
27928caaf7b6SNicolas Dichtel 						return 0;
27938caaf7b6SNicolas Dichtel 					goto nla_put_failure;
27948caaf7b6SNicolas Dichtel 				}
27958caaf7b6SNicolas Dichtel 			} else
27968caaf7b6SNicolas Dichtel #endif
2797404eb77eSRoopa Prabhu 				if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
2798f3756b79SDavid S. Miller 					goto nla_put_failure;
27991da177e4SLinus Torvalds 		}
2800*d948974cSStefano Brivio 	}
2801*d948974cSStefano Brivio 
2802*d948974cSStefano Brivio 	error = rt->dst.error;
28031da177e4SLinus Torvalds 
2804f185071dSDavid S. Miller 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
2805e3703b3dSThomas Graf 		goto nla_put_failure;
28061da177e4SLinus Torvalds 
2807053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
2808053c095aSJohannes Berg 	return 0;
2809be403ea1SThomas Graf 
2810be403ea1SThomas Graf nla_put_failure:
281126932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
281226932566SPatrick McHardy 	return -EMSGSIZE;
28131da177e4SLinus Torvalds }
28141da177e4SLinus Torvalds 
2815404eb77eSRoopa Prabhu static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2816404eb77eSRoopa Prabhu 						   u8 ip_proto, __be16 sport,
2817404eb77eSRoopa Prabhu 						   __be16 dport)
2818404eb77eSRoopa Prabhu {
2819404eb77eSRoopa Prabhu 	struct sk_buff *skb;
2820404eb77eSRoopa Prabhu 	struct iphdr *iph;
2821404eb77eSRoopa Prabhu 
2822404eb77eSRoopa Prabhu 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2823404eb77eSRoopa Prabhu 	if (!skb)
2824404eb77eSRoopa Prabhu 		return NULL;
2825404eb77eSRoopa Prabhu 
2826404eb77eSRoopa Prabhu 	/* Reserve room for dummy headers, this skb can pass
2827404eb77eSRoopa Prabhu 	 * through good chunk of routing engine.
2828404eb77eSRoopa Prabhu 	 */
2829404eb77eSRoopa Prabhu 	skb_reset_mac_header(skb);
2830404eb77eSRoopa Prabhu 	skb_reset_network_header(skb);
2831404eb77eSRoopa Prabhu 	skb->protocol = htons(ETH_P_IP);
2832404eb77eSRoopa Prabhu 	iph = skb_put(skb, sizeof(struct iphdr));
2833404eb77eSRoopa Prabhu 	iph->protocol = ip_proto;
2834404eb77eSRoopa Prabhu 	iph->saddr = src;
2835404eb77eSRoopa Prabhu 	iph->daddr = dst;
2836404eb77eSRoopa Prabhu 	iph->version = 0x4;
2837404eb77eSRoopa Prabhu 	iph->frag_off = 0;
2838404eb77eSRoopa Prabhu 	iph->ihl = 0x5;
2839404eb77eSRoopa Prabhu 	skb_set_transport_header(skb, skb->len);
2840404eb77eSRoopa Prabhu 
2841404eb77eSRoopa Prabhu 	switch (iph->protocol) {
2842404eb77eSRoopa Prabhu 	case IPPROTO_UDP: {
2843404eb77eSRoopa Prabhu 		struct udphdr *udph;
2844404eb77eSRoopa Prabhu 
2845404eb77eSRoopa Prabhu 		udph = skb_put_zero(skb, sizeof(struct udphdr));
2846404eb77eSRoopa Prabhu 		udph->source = sport;
2847404eb77eSRoopa Prabhu 		udph->dest = dport;
2848404eb77eSRoopa Prabhu 		udph->len = sizeof(struct udphdr);
2849404eb77eSRoopa Prabhu 		udph->check = 0;
2850404eb77eSRoopa Prabhu 		break;
2851404eb77eSRoopa Prabhu 	}
2852404eb77eSRoopa Prabhu 	case IPPROTO_TCP: {
2853404eb77eSRoopa Prabhu 		struct tcphdr *tcph;
2854404eb77eSRoopa Prabhu 
2855404eb77eSRoopa Prabhu 		tcph = skb_put_zero(skb, sizeof(struct tcphdr));
2856404eb77eSRoopa Prabhu 		tcph->source	= sport;
2857404eb77eSRoopa Prabhu 		tcph->dest	= dport;
2858404eb77eSRoopa Prabhu 		tcph->doff	= sizeof(struct tcphdr) / 4;
2859404eb77eSRoopa Prabhu 		tcph->rst = 1;
2860404eb77eSRoopa Prabhu 		tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
2861404eb77eSRoopa Prabhu 					    src, dst, 0);
2862404eb77eSRoopa Prabhu 		break;
2863404eb77eSRoopa Prabhu 	}
2864404eb77eSRoopa Prabhu 	case IPPROTO_ICMP: {
2865404eb77eSRoopa Prabhu 		struct icmphdr *icmph;
2866404eb77eSRoopa Prabhu 
2867404eb77eSRoopa Prabhu 		icmph = skb_put_zero(skb, sizeof(struct icmphdr));
2868404eb77eSRoopa Prabhu 		icmph->type = ICMP_ECHO;
2869404eb77eSRoopa Prabhu 		icmph->code = 0;
2870404eb77eSRoopa Prabhu 	}
2871404eb77eSRoopa Prabhu 	}
2872404eb77eSRoopa Prabhu 
2873404eb77eSRoopa Prabhu 	return skb;
2874404eb77eSRoopa Prabhu }
2875404eb77eSRoopa Prabhu 
2876a00302b6SJakub Kicinski static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
2877a00302b6SJakub Kicinski 				       const struct nlmsghdr *nlh,
2878a00302b6SJakub Kicinski 				       struct nlattr **tb,
2879a00302b6SJakub Kicinski 				       struct netlink_ext_ack *extack)
2880a00302b6SJakub Kicinski {
2881a00302b6SJakub Kicinski 	struct rtmsg *rtm;
2882a00302b6SJakub Kicinski 	int i, err;
2883a00302b6SJakub Kicinski 
2884a00302b6SJakub Kicinski 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
2885a00302b6SJakub Kicinski 		NL_SET_ERR_MSG(extack,
2886a00302b6SJakub Kicinski 			       "ipv4: Invalid header for route get request");
2887a00302b6SJakub Kicinski 		return -EINVAL;
2888a00302b6SJakub Kicinski 	}
2889a00302b6SJakub Kicinski 
2890a00302b6SJakub Kicinski 	if (!netlink_strict_get_check(skb))
28918cb08174SJohannes Berg 		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
2892a00302b6SJakub Kicinski 					      rtm_ipv4_policy, extack);
2893a00302b6SJakub Kicinski 
2894a00302b6SJakub Kicinski 	rtm = nlmsg_data(nlh);
2895a00302b6SJakub Kicinski 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2896a00302b6SJakub Kicinski 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2897a00302b6SJakub Kicinski 	    rtm->rtm_table || rtm->rtm_protocol ||
2898a00302b6SJakub Kicinski 	    rtm->rtm_scope || rtm->rtm_type) {
2899a00302b6SJakub Kicinski 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
2900a00302b6SJakub Kicinski 		return -EINVAL;
2901a00302b6SJakub Kicinski 	}
2902a00302b6SJakub Kicinski 
2903a00302b6SJakub Kicinski 	if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
2904a00302b6SJakub Kicinski 			       RTM_F_LOOKUP_TABLE |
2905a00302b6SJakub Kicinski 			       RTM_F_FIB_MATCH)) {
2906a00302b6SJakub Kicinski 		NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
2907a00302b6SJakub Kicinski 		return -EINVAL;
2908a00302b6SJakub Kicinski 	}
2909a00302b6SJakub Kicinski 
29108cb08174SJohannes Berg 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2911a00302b6SJakub Kicinski 					    rtm_ipv4_policy, extack);
2912a00302b6SJakub Kicinski 	if (err)
2913a00302b6SJakub Kicinski 		return err;
2914a00302b6SJakub Kicinski 
2915a00302b6SJakub Kicinski 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2916a00302b6SJakub Kicinski 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2917a00302b6SJakub Kicinski 		NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2918a00302b6SJakub Kicinski 		return -EINVAL;
2919a00302b6SJakub Kicinski 	}
2920a00302b6SJakub Kicinski 
2921a00302b6SJakub Kicinski 	for (i = 0; i <= RTA_MAX; i++) {
2922a00302b6SJakub Kicinski 		if (!tb[i])
2923a00302b6SJakub Kicinski 			continue;
2924a00302b6SJakub Kicinski 
2925a00302b6SJakub Kicinski 		switch (i) {
2926a00302b6SJakub Kicinski 		case RTA_IIF:
2927a00302b6SJakub Kicinski 		case RTA_OIF:
2928a00302b6SJakub Kicinski 		case RTA_SRC:
2929a00302b6SJakub Kicinski 		case RTA_DST:
2930a00302b6SJakub Kicinski 		case RTA_IP_PROTO:
2931a00302b6SJakub Kicinski 		case RTA_SPORT:
2932a00302b6SJakub Kicinski 		case RTA_DPORT:
2933a00302b6SJakub Kicinski 		case RTA_MARK:
2934a00302b6SJakub Kicinski 		case RTA_UID:
2935a00302b6SJakub Kicinski 			break;
2936a00302b6SJakub Kicinski 		default:
2937a00302b6SJakub Kicinski 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
2938a00302b6SJakub Kicinski 			return -EINVAL;
2939a00302b6SJakub Kicinski 		}
2940a00302b6SJakub Kicinski 	}
2941a00302b6SJakub Kicinski 
2942a00302b6SJakub Kicinski 	return 0;
2943a00302b6SJakub Kicinski }
2944a00302b6SJakub Kicinski 
2945c21ef3e3SDavid Ahern static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2946c21ef3e3SDavid Ahern 			     struct netlink_ext_ack *extack)
29471da177e4SLinus Torvalds {
29483b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
2949d889ce3bSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
2950404eb77eSRoopa Prabhu 	u32 table_id = RT_TABLE_MAIN;
2951404eb77eSRoopa Prabhu 	__be16 sport = 0, dport = 0;
29523765d35eSDavid Ahern 	struct fib_result res = {};
2953404eb77eSRoopa Prabhu 	u8 ip_proto = IPPROTO_UDP;
29541da177e4SLinus Torvalds 	struct rtable *rt = NULL;
2955404eb77eSRoopa Prabhu 	struct sk_buff *skb;
2956404eb77eSRoopa Prabhu 	struct rtmsg *rtm;
2957e8e3fbe9SMaciej Żenczykowski 	struct flowi4 fl4 = {};
29589e12bb22SAl Viro 	__be32 dst = 0;
29599e12bb22SAl Viro 	__be32 src = 0;
2960404eb77eSRoopa Prabhu 	kuid_t uid;
29619e12bb22SAl Viro 	u32 iif;
2962d889ce3bSThomas Graf 	int err;
2963963bfeeeSEric Dumazet 	int mark;
29641da177e4SLinus Torvalds 
2965a00302b6SJakub Kicinski 	err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2966d889ce3bSThomas Graf 	if (err < 0)
2967404eb77eSRoopa Prabhu 		return err;
2968d889ce3bSThomas Graf 
2969d889ce3bSThomas Graf 	rtm = nlmsg_data(nlh);
297067b61f6cSJiri Benc 	src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
297167b61f6cSJiri Benc 	dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2972d889ce3bSThomas Graf 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2973963bfeeeSEric Dumazet 	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2974622ec2c9SLorenzo Colitti 	if (tb[RTA_UID])
2975622ec2c9SLorenzo Colitti 		uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2976622ec2c9SLorenzo Colitti 	else
2977622ec2c9SLorenzo Colitti 		uid = (iif ? INVALID_UID : current_uid());
29781da177e4SLinus Torvalds 
2979404eb77eSRoopa Prabhu 	if (tb[RTA_IP_PROTO]) {
2980404eb77eSRoopa Prabhu 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
29815e1a99eaSHangbin Liu 						  &ip_proto, AF_INET, extack);
2982404eb77eSRoopa Prabhu 		if (err)
2983404eb77eSRoopa Prabhu 			return err;
2984404eb77eSRoopa Prabhu 	}
2985bbadb9a2SFlorian Larysch 
2986404eb77eSRoopa Prabhu 	if (tb[RTA_SPORT])
2987404eb77eSRoopa Prabhu 		sport = nla_get_be16(tb[RTA_SPORT]);
2988404eb77eSRoopa Prabhu 
2989404eb77eSRoopa Prabhu 	if (tb[RTA_DPORT])
2990404eb77eSRoopa Prabhu 		dport = nla_get_be16(tb[RTA_DPORT]);
2991404eb77eSRoopa Prabhu 
2992404eb77eSRoopa Prabhu 	skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
2993404eb77eSRoopa Prabhu 	if (!skb)
2994404eb77eSRoopa Prabhu 		return -ENOBUFS;
2995bbadb9a2SFlorian Larysch 
2996d6c0a4f6SDavid Miller 	fl4.daddr = dst;
2997d6c0a4f6SDavid Miller 	fl4.saddr = src;
2998d6c0a4f6SDavid Miller 	fl4.flowi4_tos = rtm->rtm_tos;
2999d6c0a4f6SDavid Miller 	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
3000d6c0a4f6SDavid Miller 	fl4.flowi4_mark = mark;
3001622ec2c9SLorenzo Colitti 	fl4.flowi4_uid = uid;
3002404eb77eSRoopa Prabhu 	if (sport)
3003404eb77eSRoopa Prabhu 		fl4.fl4_sport = sport;
3004404eb77eSRoopa Prabhu 	if (dport)
3005404eb77eSRoopa Prabhu 		fl4.fl4_dport = dport;
3006404eb77eSRoopa Prabhu 	fl4.flowi4_proto = ip_proto;
3007d6c0a4f6SDavid Miller 
30083765d35eSDavid Ahern 	rcu_read_lock();
30093765d35eSDavid Ahern 
30101da177e4SLinus Torvalds 	if (iif) {
3011d889ce3bSThomas Graf 		struct net_device *dev;
3012d889ce3bSThomas Graf 
30133765d35eSDavid Ahern 		dev = dev_get_by_index_rcu(net, iif);
301451456b29SIan Morris 		if (!dev) {
30151da177e4SLinus Torvalds 			err = -ENODEV;
3016404eb77eSRoopa Prabhu 			goto errout_rcu;
3017d889ce3bSThomas Graf 		}
3018d889ce3bSThomas Graf 
3019404eb77eSRoopa Prabhu 		fl4.flowi4_iif = iif; /* for rt_fill_info */
30201da177e4SLinus Torvalds 		skb->dev	= dev;
3021963bfeeeSEric Dumazet 		skb->mark	= mark;
30223765d35eSDavid Ahern 		err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
30233765d35eSDavid Ahern 					 dev, &res);
3024d889ce3bSThomas Graf 
3025511c3f92SEric Dumazet 		rt = skb_rtable(skb);
3026d8d1f30bSChangli Gao 		if (err == 0 && rt->dst.error)
3027d8d1f30bSChangli Gao 			err = -rt->dst.error;
30281da177e4SLinus Torvalds 	} else {
30296503a304SLorenzo Colitti 		fl4.flowi4_iif = LOOPBACK_IFINDEX;
303021f94775SIdo Schimmel 		skb->dev = net->loopback_dev;
30313765d35eSDavid Ahern 		rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
3032b23dd4feSDavid S. Miller 		err = 0;
3033b23dd4feSDavid S. Miller 		if (IS_ERR(rt))
3034b23dd4feSDavid S. Miller 			err = PTR_ERR(rt);
30352c87d63aSFlorian Westphal 		else
30362c87d63aSFlorian Westphal 			skb_dst_set(skb, &rt->dst);
30371da177e4SLinus Torvalds 	}
3038d889ce3bSThomas Graf 
30391da177e4SLinus Torvalds 	if (err)
3040404eb77eSRoopa Prabhu 		goto errout_rcu;
30411da177e4SLinus Torvalds 
30421da177e4SLinus Torvalds 	if (rtm->rtm_flags & RTM_F_NOTIFY)
30431da177e4SLinus Torvalds 		rt->rt_flags |= RTCF_NOTIFY;
30441da177e4SLinus Torvalds 
3045c36ba660SDavid Ahern 	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
304668e813aaSDavid Ahern 		table_id = res.table ? res.table->tb_id : 0;
3047c36ba660SDavid Ahern 
3048404eb77eSRoopa Prabhu 	/* reset skb for netlink reply msg */
3049404eb77eSRoopa Prabhu 	skb_trim(skb, 0);
3050404eb77eSRoopa Prabhu 	skb_reset_network_header(skb);
3051404eb77eSRoopa Prabhu 	skb_reset_transport_header(skb);
3052404eb77eSRoopa Prabhu 	skb_reset_mac_header(skb);
3053404eb77eSRoopa Prabhu 
3054bc3aae2bSRoopa Prabhu 	if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
3055bc3aae2bSRoopa Prabhu 		if (!res.fi) {
3056bc3aae2bSRoopa Prabhu 			err = fib_props[res.type].error;
3057bc3aae2bSRoopa Prabhu 			if (!err)
3058bc3aae2bSRoopa Prabhu 				err = -EHOSTUNREACH;
3059404eb77eSRoopa Prabhu 			goto errout_rcu;
3060bc3aae2bSRoopa Prabhu 		}
3061b6179813SRoopa Prabhu 		err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
3062b6179813SRoopa Prabhu 				    nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
3063b6179813SRoopa Prabhu 				    rt->rt_type, res.prefix, res.prefixlen,
3064b6179813SRoopa Prabhu 				    fl4.flowi4_tos, res.fi, 0);
3065bc3aae2bSRoopa Prabhu 	} else {
3066404eb77eSRoopa Prabhu 		err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
3067ba52d61eSRoopa Prabhu 				   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
3068bc3aae2bSRoopa Prabhu 	}
30697b46a644SDavid S. Miller 	if (err < 0)
3070404eb77eSRoopa Prabhu 		goto errout_rcu;
30711da177e4SLinus Torvalds 
30723765d35eSDavid Ahern 	rcu_read_unlock();
30733765d35eSDavid Ahern 
307415e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
30751da177e4SLinus Torvalds 
3076d889ce3bSThomas Graf errout_free:
3077404eb77eSRoopa Prabhu 	return err;
3078404eb77eSRoopa Prabhu errout_rcu:
30793765d35eSDavid Ahern 	rcu_read_unlock();
30801da177e4SLinus Torvalds 	kfree_skb(skb);
3081404eb77eSRoopa Prabhu 	goto errout_free;
30821da177e4SLinus Torvalds }
30831da177e4SLinus Torvalds 
30841da177e4SLinus Torvalds void ip_rt_multicast_event(struct in_device *in_dev)
30851da177e4SLinus Torvalds {
30864ccfe6d4SNicolas Dichtel 	rt_cache_flush(dev_net(in_dev->dev));
30871da177e4SLinus Torvalds }
30881da177e4SLinus Torvalds 
30891da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
3090082c7ca4SGao feng static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
3091082c7ca4SGao feng static int ip_rt_gc_min_interval __read_mostly	= HZ / 2;
3092082c7ca4SGao feng static int ip_rt_gc_elasticity __read_mostly	= 8;
3093773daa3cSArnd Bergmann static int ip_min_valid_pmtu __read_mostly	= IPV4_MIN_MTU;
3094082c7ca4SGao feng 
3095fe2c6338SJoe Perches static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
30968d65af78SAlexey Dobriyan 					void __user *buffer,
30971da177e4SLinus Torvalds 					size_t *lenp, loff_t *ppos)
30981da177e4SLinus Torvalds {
30995aad1de5STimo Teräs 	struct net *net = (struct net *)__ctl->extra1;
31005aad1de5STimo Teräs 
31011da177e4SLinus Torvalds 	if (write) {
31025aad1de5STimo Teräs 		rt_cache_flush(net);
31035aad1de5STimo Teräs 		fnhe_genid_bump(net);
31041da177e4SLinus Torvalds 		return 0;
31051da177e4SLinus Torvalds 	}
31061da177e4SLinus Torvalds 
31071da177e4SLinus Torvalds 	return -EINVAL;
31081da177e4SLinus Torvalds }
31091da177e4SLinus Torvalds 
3110fe2c6338SJoe Perches static struct ctl_table ipv4_route_table[] = {
31111da177e4SLinus Torvalds 	{
31121da177e4SLinus Torvalds 		.procname	= "gc_thresh",
31131da177e4SLinus Torvalds 		.data		= &ipv4_dst_ops.gc_thresh,
31141da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31151da177e4SLinus Torvalds 		.mode		= 0644,
31166d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
31171da177e4SLinus Torvalds 	},
31181da177e4SLinus Torvalds 	{
31191da177e4SLinus Torvalds 		.procname	= "max_size",
31201da177e4SLinus Torvalds 		.data		= &ip_rt_max_size,
31211da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31221da177e4SLinus Torvalds 		.mode		= 0644,
31236d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
31241da177e4SLinus Torvalds 	},
31251da177e4SLinus Torvalds 	{
31261da177e4SLinus Torvalds 		/*  Deprecated. Use gc_min_interval_ms */
31271da177e4SLinus Torvalds 
31281da177e4SLinus Torvalds 		.procname	= "gc_min_interval",
31291da177e4SLinus Torvalds 		.data		= &ip_rt_gc_min_interval,
31301da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31311da177e4SLinus Torvalds 		.mode		= 0644,
31326d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec_jiffies,
31331da177e4SLinus Torvalds 	},
31341da177e4SLinus Torvalds 	{
31351da177e4SLinus Torvalds 		.procname	= "gc_min_interval_ms",
31361da177e4SLinus Torvalds 		.data		= &ip_rt_gc_min_interval,
31371da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31381da177e4SLinus Torvalds 		.mode		= 0644,
31396d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec_ms_jiffies,
31401da177e4SLinus Torvalds 	},
31411da177e4SLinus Torvalds 	{
31421da177e4SLinus Torvalds 		.procname	= "gc_timeout",
31431da177e4SLinus Torvalds 		.data		= &ip_rt_gc_timeout,
31441da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31451da177e4SLinus Torvalds 		.mode		= 0644,
31466d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec_jiffies,
31471da177e4SLinus Torvalds 	},
31481da177e4SLinus Torvalds 	{
31499f28a2fcSEric Dumazet 		.procname	= "gc_interval",
31509f28a2fcSEric Dumazet 		.data		= &ip_rt_gc_interval,
31519f28a2fcSEric Dumazet 		.maxlen		= sizeof(int),
31529f28a2fcSEric Dumazet 		.mode		= 0644,
31539f28a2fcSEric Dumazet 		.proc_handler	= proc_dointvec_jiffies,
31549f28a2fcSEric Dumazet 	},
31559f28a2fcSEric Dumazet 	{
31561da177e4SLinus Torvalds 		.procname	= "redirect_load",
31571da177e4SLinus Torvalds 		.data		= &ip_rt_redirect_load,
31581da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31591da177e4SLinus Torvalds 		.mode		= 0644,
31606d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
31611da177e4SLinus Torvalds 	},
31621da177e4SLinus Torvalds 	{
31631da177e4SLinus Torvalds 		.procname	= "redirect_number",
31641da177e4SLinus Torvalds 		.data		= &ip_rt_redirect_number,
31651da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31661da177e4SLinus Torvalds 		.mode		= 0644,
31676d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
31681da177e4SLinus Torvalds 	},
31691da177e4SLinus Torvalds 	{
31701da177e4SLinus Torvalds 		.procname	= "redirect_silence",
31711da177e4SLinus Torvalds 		.data		= &ip_rt_redirect_silence,
31721da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31731da177e4SLinus Torvalds 		.mode		= 0644,
31746d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
31751da177e4SLinus Torvalds 	},
31761da177e4SLinus Torvalds 	{
31771da177e4SLinus Torvalds 		.procname	= "error_cost",
31781da177e4SLinus Torvalds 		.data		= &ip_rt_error_cost,
31791da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31801da177e4SLinus Torvalds 		.mode		= 0644,
31816d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
31821da177e4SLinus Torvalds 	},
31831da177e4SLinus Torvalds 	{
31841da177e4SLinus Torvalds 		.procname	= "error_burst",
31851da177e4SLinus Torvalds 		.data		= &ip_rt_error_burst,
31861da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31871da177e4SLinus Torvalds 		.mode		= 0644,
31886d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
31891da177e4SLinus Torvalds 	},
31901da177e4SLinus Torvalds 	{
31911da177e4SLinus Torvalds 		.procname	= "gc_elasticity",
31921da177e4SLinus Torvalds 		.data		= &ip_rt_gc_elasticity,
31931da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
31941da177e4SLinus Torvalds 		.mode		= 0644,
31956d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
31961da177e4SLinus Torvalds 	},
31971da177e4SLinus Torvalds 	{
31981da177e4SLinus Torvalds 		.procname	= "mtu_expires",
31991da177e4SLinus Torvalds 		.data		= &ip_rt_mtu_expires,
32001da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
32011da177e4SLinus Torvalds 		.mode		= 0644,
32026d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec_jiffies,
32031da177e4SLinus Torvalds 	},
32041da177e4SLinus Torvalds 	{
32051da177e4SLinus Torvalds 		.procname	= "min_pmtu",
32061da177e4SLinus Torvalds 		.data		= &ip_rt_min_pmtu,
32071da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
32081da177e4SLinus Torvalds 		.mode		= 0644,
3209c7272c2fSSabrina Dubroca 		.proc_handler	= proc_dointvec_minmax,
3210c7272c2fSSabrina Dubroca 		.extra1		= &ip_min_valid_pmtu,
32111da177e4SLinus Torvalds 	},
32121da177e4SLinus Torvalds 	{
32131da177e4SLinus Torvalds 		.procname	= "min_adv_mss",
32141da177e4SLinus Torvalds 		.data		= &ip_rt_min_advmss,
32151da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
32161da177e4SLinus Torvalds 		.mode		= 0644,
32176d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
32181da177e4SLinus Torvalds 	},
3219f8572d8fSEric W. Biederman 	{ }
32201da177e4SLinus Torvalds };
322139a23e75SDenis V. Lunev 
322239a23e75SDenis V. Lunev static struct ctl_table ipv4_route_flush_table[] = {
322339a23e75SDenis V. Lunev 	{
322439a23e75SDenis V. Lunev 		.procname	= "flush",
322539a23e75SDenis V. Lunev 		.maxlen		= sizeof(int),
322639a23e75SDenis V. Lunev 		.mode		= 0200,
32276d9f239aSAlexey Dobriyan 		.proc_handler	= ipv4_sysctl_rtcache_flush,
322839a23e75SDenis V. Lunev 	},
3229f8572d8fSEric W. Biederman 	{ },
323039a23e75SDenis V. Lunev };
323139a23e75SDenis V. Lunev 
323239a23e75SDenis V. Lunev static __net_init int sysctl_route_net_init(struct net *net)
323339a23e75SDenis V. Lunev {
323439a23e75SDenis V. Lunev 	struct ctl_table *tbl;
323539a23e75SDenis V. Lunev 
323639a23e75SDenis V. Lunev 	tbl = ipv4_route_flush_table;
323709ad9bc7SOctavian Purdila 	if (!net_eq(net, &init_net)) {
323839a23e75SDenis V. Lunev 		tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
323951456b29SIan Morris 		if (!tbl)
324039a23e75SDenis V. Lunev 			goto err_dup;
3241464dc801SEric W. Biederman 
3242464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
3243464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
3244464dc801SEric W. Biederman 			tbl[0].procname = NULL;
324539a23e75SDenis V. Lunev 	}
324639a23e75SDenis V. Lunev 	tbl[0].extra1 = net;
324739a23e75SDenis V. Lunev 
3248ec8f23ceSEric W. Biederman 	net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
324951456b29SIan Morris 	if (!net->ipv4.route_hdr)
325039a23e75SDenis V. Lunev 		goto err_reg;
325139a23e75SDenis V. Lunev 	return 0;
325239a23e75SDenis V. Lunev 
325339a23e75SDenis V. Lunev err_reg:
325439a23e75SDenis V. Lunev 	if (tbl != ipv4_route_flush_table)
325539a23e75SDenis V. Lunev 		kfree(tbl);
325639a23e75SDenis V. Lunev err_dup:
325739a23e75SDenis V. Lunev 	return -ENOMEM;
325839a23e75SDenis V. Lunev }
325939a23e75SDenis V. Lunev 
326039a23e75SDenis V. Lunev static __net_exit void sysctl_route_net_exit(struct net *net)
326139a23e75SDenis V. Lunev {
326239a23e75SDenis V. Lunev 	struct ctl_table *tbl;
326339a23e75SDenis V. Lunev 
326439a23e75SDenis V. Lunev 	tbl = net->ipv4.route_hdr->ctl_table_arg;
326539a23e75SDenis V. Lunev 	unregister_net_sysctl_table(net->ipv4.route_hdr);
326639a23e75SDenis V. Lunev 	BUG_ON(tbl == ipv4_route_flush_table);
326739a23e75SDenis V. Lunev 	kfree(tbl);
326839a23e75SDenis V. Lunev }
326939a23e75SDenis V. Lunev 
327039a23e75SDenis V. Lunev static __net_initdata struct pernet_operations sysctl_route_ops = {
327139a23e75SDenis V. Lunev 	.init = sysctl_route_net_init,
327239a23e75SDenis V. Lunev 	.exit = sysctl_route_net_exit,
327339a23e75SDenis V. Lunev };
32741da177e4SLinus Torvalds #endif
32751da177e4SLinus Torvalds 
32763ee94372SNeil Horman static __net_init int rt_genid_init(struct net *net)
32779f5e97e5SDenis V. Lunev {
3278ca4c3fc2Sfan.du 	atomic_set(&net->ipv4.rt_genid, 0);
32795aad1de5STimo Teräs 	atomic_set(&net->fnhe_genid, 0);
32807aed9f72SJason A. Donenfeld 	atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
32819f5e97e5SDenis V. Lunev 	return 0;
32829f5e97e5SDenis V. Lunev }
32839f5e97e5SDenis V. Lunev 
32843ee94372SNeil Horman static __net_initdata struct pernet_operations rt_genid_ops = {
32853ee94372SNeil Horman 	.init = rt_genid_init,
32869f5e97e5SDenis V. Lunev };
32879f5e97e5SDenis V. Lunev 
3288c3426b47SDavid S. Miller static int __net_init ipv4_inetpeer_init(struct net *net)
3289c3426b47SDavid S. Miller {
3290c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3291c3426b47SDavid S. Miller 
3292c3426b47SDavid S. Miller 	if (!bp)
3293c3426b47SDavid S. Miller 		return -ENOMEM;
3294c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
3295c3426b47SDavid S. Miller 	net->ipv4.peers = bp;
3296c3426b47SDavid S. Miller 	return 0;
3297c3426b47SDavid S. Miller }
3298c3426b47SDavid S. Miller 
3299c3426b47SDavid S. Miller static void __net_exit ipv4_inetpeer_exit(struct net *net)
3300c3426b47SDavid S. Miller {
3301c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv4.peers;
3302c3426b47SDavid S. Miller 
3303c3426b47SDavid S. Miller 	net->ipv4.peers = NULL;
330456a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
3305c3426b47SDavid S. Miller 	kfree(bp);
3306c3426b47SDavid S. Miller }
3307c3426b47SDavid S. Miller 
3308c3426b47SDavid S. Miller static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3309c3426b47SDavid S. Miller 	.init	=	ipv4_inetpeer_init,
3310c3426b47SDavid S. Miller 	.exit	=	ipv4_inetpeer_exit,
3311c3426b47SDavid S. Miller };
33129f5e97e5SDenis V. Lunev 
3313c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
33147d720c3eSTejun Heo struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3315c7066f70SPatrick McHardy #endif /* CONFIG_IP_ROUTE_CLASSID */
33161da177e4SLinus Torvalds 
33171da177e4SLinus Torvalds int __init ip_rt_init(void)
33181da177e4SLinus Torvalds {
33195055c371SEric Dumazet 	int cpu;
33201da177e4SLinus Torvalds 
33216da2ec56SKees Cook 	ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
33226da2ec56SKees Cook 				  GFP_KERNEL);
332373f156a6SEric Dumazet 	if (!ip_idents)
332473f156a6SEric Dumazet 		panic("IP: failed to allocate ip_idents\n");
332573f156a6SEric Dumazet 
332673f156a6SEric Dumazet 	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
332773f156a6SEric Dumazet 
3328355b590cSEric Dumazet 	ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3329355b590cSEric Dumazet 	if (!ip_tstamps)
3330355b590cSEric Dumazet 		panic("IP: failed to allocate ip_tstamps\n");
3331355b590cSEric Dumazet 
33325055c371SEric Dumazet 	for_each_possible_cpu(cpu) {
33335055c371SEric Dumazet 		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
33345055c371SEric Dumazet 
33355055c371SEric Dumazet 		INIT_LIST_HEAD(&ul->head);
33365055c371SEric Dumazet 		spin_lock_init(&ul->lock);
33375055c371SEric Dumazet 	}
3338c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
33390dcec8c2SIngo Molnar 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
33401da177e4SLinus Torvalds 	if (!ip_rt_acct)
33411da177e4SLinus Torvalds 		panic("IP: failed to allocate ip_rt_acct\n");
33421da177e4SLinus Torvalds #endif
33431da177e4SLinus Torvalds 
3344e5d679f3SAlexey Dobriyan 	ipv4_dst_ops.kmem_cachep =
3345e5d679f3SAlexey Dobriyan 		kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
334620c2df83SPaul Mundt 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
33471da177e4SLinus Torvalds 
334814e50e57SDavid S. Miller 	ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
334914e50e57SDavid S. Miller 
3350fc66f95cSEric Dumazet 	if (dst_entries_init(&ipv4_dst_ops) < 0)
3351fc66f95cSEric Dumazet 		panic("IP: failed to allocate ipv4_dst_ops counter\n");
3352fc66f95cSEric Dumazet 
3353fc66f95cSEric Dumazet 	if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3354fc66f95cSEric Dumazet 		panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3355fc66f95cSEric Dumazet 
335689aef892SDavid S. Miller 	ipv4_dst_ops.gc_thresh = ~0;
335789aef892SDavid S. Miller 	ip_rt_max_size = INT_MAX;
33581da177e4SLinus Torvalds 
33591da177e4SLinus Torvalds 	devinet_init();
33601da177e4SLinus Torvalds 	ip_fib_init();
33611da177e4SLinus Torvalds 
336273b38711SDenis V. Lunev 	if (ip_rt_proc_init())
3363058bd4d2SJoe Perches 		pr_err("Unable to create route proc files\n");
33641da177e4SLinus Torvalds #ifdef CONFIG_XFRM
33651da177e4SLinus Torvalds 	xfrm_init();
3366703fb94eSSteffen Klassert 	xfrm4_init();
33671da177e4SLinus Torvalds #endif
3368394f51abSFlorian Westphal 	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3369394f51abSFlorian Westphal 		      RTNL_FLAG_DOIT_UNLOCKED);
337063f3444fSThomas Graf 
337139a23e75SDenis V. Lunev #ifdef CONFIG_SYSCTL
337239a23e75SDenis V. Lunev 	register_pernet_subsys(&sysctl_route_ops);
337339a23e75SDenis V. Lunev #endif
33743ee94372SNeil Horman 	register_pernet_subsys(&rt_genid_ops);
3375c3426b47SDavid S. Miller 	register_pernet_subsys(&ipv4_inetpeer_ops);
33761bcdca3fSTim Hansen 	return 0;
33771da177e4SLinus Torvalds }
33781da177e4SLinus Torvalds 
3379a1bc6eb4SAl Viro #ifdef CONFIG_SYSCTL
3380eeb61f71SAl Viro /*
3381eeb61f71SAl Viro  * We really need to sanitize the damn ipv4 init order, then all
3382eeb61f71SAl Viro  * this nonsense will go away.
3383eeb61f71SAl Viro  */
3384eeb61f71SAl Viro void __init ip_static_sysctl_init(void)
3385eeb61f71SAl Viro {
33864e5ca785SEric W. Biederman 	register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
3387eeb61f71SAl Viro }
3388a1bc6eb4SAl Viro #endif
3389