xref: /linux/net/ipv4/route.c (revision 5cbf777cfdf6e5a7b7149006e4881a255da78fdd)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		ROUTE - implementation of the IP router.
71da177e4SLinus Torvalds  *
802c30a84SJesper Juhl  * Authors:	Ross Biro
91da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
101da177e4SLinus Torvalds  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
111da177e4SLinus Torvalds  *		Linus Torvalds, <Linus.Torvalds@helsinki.fi>
121da177e4SLinus Torvalds  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
131da177e4SLinus Torvalds  *
141da177e4SLinus Torvalds  * Fixes:
151da177e4SLinus Torvalds  *		Alan Cox	:	Verify area fixes.
161da177e4SLinus Torvalds  *		Alan Cox	:	cli() protects routing changes
171da177e4SLinus Torvalds  *		Rui Oliveira	:	ICMP routing table updates
181da177e4SLinus Torvalds  *		(rco@di.uminho.pt)	Routing table insertion and update
191da177e4SLinus Torvalds  *		Linus Torvalds	:	Rewrote bits to be sensible
201da177e4SLinus Torvalds  *		Alan Cox	:	Added BSD route gw semantics
211da177e4SLinus Torvalds  *		Alan Cox	:	Super /proc >4K
221da177e4SLinus Torvalds  *		Alan Cox	:	MTU in route table
231da177e4SLinus Torvalds  *		Alan Cox	: 	MSS actually. Also added the window
241da177e4SLinus Torvalds  *					clamper.
251da177e4SLinus Torvalds  *		Sam Lantinga	:	Fixed route matching in rt_del()
261da177e4SLinus Torvalds  *		Alan Cox	:	Routing cache support.
271da177e4SLinus Torvalds  *		Alan Cox	:	Removed compatibility cruft.
281da177e4SLinus Torvalds  *		Alan Cox	:	RTF_REJECT support.
291da177e4SLinus Torvalds  *		Alan Cox	:	TCP irtt support.
301da177e4SLinus Torvalds  *		Jonathan Naylor	:	Added Metric support.
311da177e4SLinus Torvalds  *	Miquel van Smoorenburg	:	BSD API fixes.
321da177e4SLinus Torvalds  *	Miquel van Smoorenburg	:	Metrics.
331da177e4SLinus Torvalds  *		Alan Cox	:	Use __u32 properly
341da177e4SLinus Torvalds  *		Alan Cox	:	Aligned routing errors more closely with BSD
351da177e4SLinus Torvalds  *					our system is still very different.
361da177e4SLinus Torvalds  *		Alan Cox	:	Faster /proc handling
371da177e4SLinus Torvalds  *	Alexey Kuznetsov	:	Massive rework to support tree based routing,
381da177e4SLinus Torvalds  *					routing caches and better behaviour.
391da177e4SLinus Torvalds  *
401da177e4SLinus Torvalds  *		Olaf Erb	:	irtt wasn't being copied right.
411da177e4SLinus Torvalds  *		Bjorn Ekwall	:	Kerneld route support.
421da177e4SLinus Torvalds  *		Alan Cox	:	Multicast fixed (I hope)
431da177e4SLinus Torvalds  * 		Pavel Krauz	:	Limited broadcast fixed
441da177e4SLinus Torvalds  *		Mike McLagan	:	Routing by source
451da177e4SLinus Torvalds  *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
461da177e4SLinus Torvalds  *					route.c and rewritten from scratch.
471da177e4SLinus Torvalds  *		Andi Kleen	:	Load-limit warning messages.
481da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
491da177e4SLinus Torvalds  *	Vitaly E. Lavrov	:	Race condition in ip_route_input_slow.
501da177e4SLinus Torvalds  *	Tobias Ringstrom	:	Uninitialized res.type in ip_route_output_slow.
511da177e4SLinus Torvalds  *	Vladimir V. Ivanov	:	IP rule info (flowid) is really useful.
521da177e4SLinus Torvalds  *		Marc Boucher	:	routing by fwmark
531da177e4SLinus Torvalds  *	Robert Olsson		:	Added rt_cache statistics
541da177e4SLinus Torvalds  *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
55bb1d23b0SEric Dumazet  *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
56cef2685eSIlia Sotnikov  * 	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
57cef2685eSIlia Sotnikov  * 	Ilia Sotnikov		:	Removed TOS from hash calculations
581da177e4SLinus Torvalds  *
591da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
601da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
611da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
621da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
631da177e4SLinus Torvalds  */
641da177e4SLinus Torvalds 
65afd46503SJoe Perches #define pr_fmt(fmt) "IPv4: " fmt
66afd46503SJoe Perches 
671da177e4SLinus Torvalds #include <linux/module.h>
687c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
691da177e4SLinus Torvalds #include <linux/bitops.h>
701da177e4SLinus Torvalds #include <linux/types.h>
711da177e4SLinus Torvalds #include <linux/kernel.h>
721da177e4SLinus Torvalds #include <linux/mm.h>
731da177e4SLinus Torvalds #include <linux/string.h>
741da177e4SLinus Torvalds #include <linux/socket.h>
751da177e4SLinus Torvalds #include <linux/sockios.h>
761da177e4SLinus Torvalds #include <linux/errno.h>
771da177e4SLinus Torvalds #include <linux/in.h>
781da177e4SLinus Torvalds #include <linux/inet.h>
791da177e4SLinus Torvalds #include <linux/netdevice.h>
801da177e4SLinus Torvalds #include <linux/proc_fs.h>
811da177e4SLinus Torvalds #include <linux/init.h>
821da177e4SLinus Torvalds #include <linux/skbuff.h>
831da177e4SLinus Torvalds #include <linux/inetdevice.h>
841da177e4SLinus Torvalds #include <linux/igmp.h>
851da177e4SLinus Torvalds #include <linux/pkt_sched.h>
861da177e4SLinus Torvalds #include <linux/mroute.h>
871da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
881da177e4SLinus Torvalds #include <linux/random.h>
891da177e4SLinus Torvalds #include <linux/rcupdate.h>
901da177e4SLinus Torvalds #include <linux/times.h>
915a0e3ad6STejun Heo #include <linux/slab.h>
9273f156a6SEric Dumazet #include <linux/jhash.h>
93352e512cSHerbert Xu #include <net/dst.h>
941b7179d3SThomas Graf #include <net/dst_metadata.h>
95457c4cbcSEric W. Biederman #include <net/net_namespace.h>
961da177e4SLinus Torvalds #include <net/protocol.h>
971da177e4SLinus Torvalds #include <net/ip.h>
981da177e4SLinus Torvalds #include <net/route.h>
991da177e4SLinus Torvalds #include <net/inetpeer.h>
1001da177e4SLinus Torvalds #include <net/sock.h>
1011da177e4SLinus Torvalds #include <net/ip_fib.h>
1021da177e4SLinus Torvalds #include <net/arp.h>
1031da177e4SLinus Torvalds #include <net/tcp.h>
1041da177e4SLinus Torvalds #include <net/icmp.h>
1051da177e4SLinus Torvalds #include <net/xfrm.h>
106571e7226SRoopa Prabhu #include <net/lwtunnel.h>
1078d71740cSTom Tucker #include <net/netevent.h>
10863f3444fSThomas Graf #include <net/rtnetlink.h>
1091da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
1101da177e4SLinus Torvalds #include <linux/sysctl.h>
1111da177e4SLinus Torvalds #endif
1126e5714eaSDavid S. Miller #include <net/secure_seq.h>
1131b7179d3SThomas Graf #include <net/ip_tunnels.h>
114385add90SDavid Ahern #include <net/l3mdev.h>
1151da177e4SLinus Torvalds 
116b6179813SRoopa Prabhu #include "fib_lookup.h"
117b6179813SRoopa Prabhu 
11868a5e3ddSDavid S. Miller #define RT_FL_TOS(oldflp4) \
119f61759e6SJulian Anastasov 	((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds #define RT_GC_TIMEOUT (300*HZ)
1221da177e4SLinus Torvalds 
1231da177e4SLinus Torvalds static int ip_rt_max_size;
124817bc4dbSStephen Hemminger static int ip_rt_redirect_number __read_mostly	= 9;
125817bc4dbSStephen Hemminger static int ip_rt_redirect_load __read_mostly	= HZ / 50;
126817bc4dbSStephen Hemminger static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
127817bc4dbSStephen Hemminger static int ip_rt_error_cost __read_mostly	= HZ;
128817bc4dbSStephen Hemminger static int ip_rt_error_burst __read_mostly	= 5 * HZ;
129817bc4dbSStephen Hemminger static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
130c7272c2fSSabrina Dubroca static u32 ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
131817bc4dbSStephen Hemminger static int ip_rt_min_advmss __read_mostly	= 256;
1329f28a2fcSEric Dumazet 
133deed49dfSXin Long static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
134c7272c2fSSabrina Dubroca 
1351da177e4SLinus Torvalds /*
1361da177e4SLinus Torvalds  *	Interface to generic destination cache.
1371da177e4SLinus Torvalds  */
1381da177e4SLinus Torvalds 
1391da177e4SLinus Torvalds static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
1400dbaee3bSDavid S. Miller static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
141ebb762f2SSteffen Klassert static unsigned int	 ipv4_mtu(const struct dst_entry *dst);
1421da177e4SLinus Torvalds static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
1431da177e4SLinus Torvalds static void		 ipv4_link_failure(struct sk_buff *skb);
1446700c270SDavid S. Miller static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1456700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
1466700c270SDavid S. Miller static void		 ip_do_redirect(struct dst_entry *dst, struct sock *sk,
1476700c270SDavid S. Miller 					struct sk_buff *skb);
148caacf05eSDavid S. Miller static void		ipv4_dst_destroy(struct dst_entry *dst);
1491da177e4SLinus Torvalds 
15062fa8a84SDavid S. Miller static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
15162fa8a84SDavid S. Miller {
15231248731SDavid S. Miller 	WARN_ON(1);
15331248731SDavid S. Miller 	return NULL;
15462fa8a84SDavid S. Miller }
15562fa8a84SDavid S. Miller 
156f894cbf8SDavid S. Miller static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
157f894cbf8SDavid S. Miller 					   struct sk_buff *skb,
158f894cbf8SDavid S. Miller 					   const void *daddr);
15963fca65dSJulian Anastasov static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
160d3aaeb38SDavid S. Miller 
1611da177e4SLinus Torvalds static struct dst_ops ipv4_dst_ops = {
1621da177e4SLinus Torvalds 	.family =		AF_INET,
1631da177e4SLinus Torvalds 	.check =		ipv4_dst_check,
1640dbaee3bSDavid S. Miller 	.default_advmss =	ipv4_default_advmss,
165ebb762f2SSteffen Klassert 	.mtu =			ipv4_mtu,
16662fa8a84SDavid S. Miller 	.cow_metrics =		ipv4_cow_metrics,
167caacf05eSDavid S. Miller 	.destroy =		ipv4_dst_destroy,
1681da177e4SLinus Torvalds 	.negative_advice =	ipv4_negative_advice,
1691da177e4SLinus Torvalds 	.link_failure =		ipv4_link_failure,
1701da177e4SLinus Torvalds 	.update_pmtu =		ip_rt_update_pmtu,
171e47a185bSDavid S. Miller 	.redirect =		ip_do_redirect,
172b92dacd4SEric W. Biederman 	.local_out =		__ip_local_out,
173d3aaeb38SDavid S. Miller 	.neigh_lookup =		ipv4_neigh_lookup,
17463fca65dSJulian Anastasov 	.confirm_neigh =	ipv4_confirm_neigh,
1751da177e4SLinus Torvalds };
1761da177e4SLinus Torvalds 
1771da177e4SLinus Torvalds #define ECN_OR_COST(class)	TC_PRIO_##class
1781da177e4SLinus Torvalds 
1794839c52bSPhilippe De Muyter const __u8 ip_tos2prio[16] = {
1801da177e4SLinus Torvalds 	TC_PRIO_BESTEFFORT,
1814a2b9c37SDan Siemon 	ECN_OR_COST(BESTEFFORT),
1821da177e4SLinus Torvalds 	TC_PRIO_BESTEFFORT,
1831da177e4SLinus Torvalds 	ECN_OR_COST(BESTEFFORT),
1841da177e4SLinus Torvalds 	TC_PRIO_BULK,
1851da177e4SLinus Torvalds 	ECN_OR_COST(BULK),
1861da177e4SLinus Torvalds 	TC_PRIO_BULK,
1871da177e4SLinus Torvalds 	ECN_OR_COST(BULK),
1881da177e4SLinus Torvalds 	TC_PRIO_INTERACTIVE,
1891da177e4SLinus Torvalds 	ECN_OR_COST(INTERACTIVE),
1901da177e4SLinus Torvalds 	TC_PRIO_INTERACTIVE,
1911da177e4SLinus Torvalds 	ECN_OR_COST(INTERACTIVE),
1921da177e4SLinus Torvalds 	TC_PRIO_INTERACTIVE_BULK,
1931da177e4SLinus Torvalds 	ECN_OR_COST(INTERACTIVE_BULK),
1941da177e4SLinus Torvalds 	TC_PRIO_INTERACTIVE_BULK,
1951da177e4SLinus Torvalds 	ECN_OR_COST(INTERACTIVE_BULK)
1961da177e4SLinus Torvalds };
197d4a96865SAmir Vadai EXPORT_SYMBOL(ip_tos2prio);
1981da177e4SLinus Torvalds 
1992f970d83SEric Dumazet static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
2003ed66e91SChristoph Lameter #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
2011da177e4SLinus Torvalds 
2021da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
2031da177e4SLinus Torvalds static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
2041da177e4SLinus Torvalds {
20529e75252SEric Dumazet 	if (*pos)
20689aef892SDavid S. Miller 		return NULL;
20729e75252SEric Dumazet 	return SEQ_START_TOKEN;
2081da177e4SLinus Torvalds }
2091da177e4SLinus Torvalds 
2101da177e4SLinus Torvalds static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2111da177e4SLinus Torvalds {
2121da177e4SLinus Torvalds 	++*pos;
21389aef892SDavid S. Miller 	return NULL;
2141da177e4SLinus Torvalds }
2151da177e4SLinus Torvalds 
2161da177e4SLinus Torvalds static void rt_cache_seq_stop(struct seq_file *seq, void *v)
2171da177e4SLinus Torvalds {
2181da177e4SLinus Torvalds }
2191da177e4SLinus Torvalds 
2201da177e4SLinus Torvalds static int rt_cache_seq_show(struct seq_file *seq, void *v)
2211da177e4SLinus Torvalds {
2221da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN)
2231da177e4SLinus Torvalds 		seq_printf(seq, "%-127s\n",
2241da177e4SLinus Torvalds 			   "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
2251da177e4SLinus Torvalds 			   "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
2261da177e4SLinus Torvalds 			   "HHUptod\tSpecDst");
2271da177e4SLinus Torvalds 	return 0;
2281da177e4SLinus Torvalds }
2291da177e4SLinus Torvalds 
230f690808eSStephen Hemminger static const struct seq_operations rt_cache_seq_ops = {
2311da177e4SLinus Torvalds 	.start  = rt_cache_seq_start,
2321da177e4SLinus Torvalds 	.next   = rt_cache_seq_next,
2331da177e4SLinus Torvalds 	.stop   = rt_cache_seq_stop,
2341da177e4SLinus Torvalds 	.show   = rt_cache_seq_show,
2351da177e4SLinus Torvalds };
2361da177e4SLinus Torvalds 
2371da177e4SLinus Torvalds static int rt_cache_seq_open(struct inode *inode, struct file *file)
2381da177e4SLinus Torvalds {
23989aef892SDavid S. Miller 	return seq_open(file, &rt_cache_seq_ops);
2401da177e4SLinus Torvalds }
2411da177e4SLinus Torvalds 
2429a32144eSArjan van de Ven static const struct file_operations rt_cache_seq_fops = {
2431da177e4SLinus Torvalds 	.open	 = rt_cache_seq_open,
2441da177e4SLinus Torvalds 	.read	 = seq_read,
2451da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
24689aef892SDavid S. Miller 	.release = seq_release,
2471da177e4SLinus Torvalds };
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds 
2501da177e4SLinus Torvalds static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
2511da177e4SLinus Torvalds {
2521da177e4SLinus Torvalds 	int cpu;
2531da177e4SLinus Torvalds 
2541da177e4SLinus Torvalds 	if (*pos == 0)
2551da177e4SLinus Torvalds 		return SEQ_START_TOKEN;
2561da177e4SLinus Torvalds 
2570f23174aSRusty Russell 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2581da177e4SLinus Torvalds 		if (!cpu_possible(cpu))
2591da177e4SLinus Torvalds 			continue;
2601da177e4SLinus Torvalds 		*pos = cpu+1;
2612f970d83SEric Dumazet 		return &per_cpu(rt_cache_stat, cpu);
2621da177e4SLinus Torvalds 	}
2631da177e4SLinus Torvalds 	return NULL;
2641da177e4SLinus Torvalds }
2651da177e4SLinus Torvalds 
2661da177e4SLinus Torvalds static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2671da177e4SLinus Torvalds {
2681da177e4SLinus Torvalds 	int cpu;
2691da177e4SLinus Torvalds 
2700f23174aSRusty Russell 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2711da177e4SLinus Torvalds 		if (!cpu_possible(cpu))
2721da177e4SLinus Torvalds 			continue;
2731da177e4SLinus Torvalds 		*pos = cpu+1;
2742f970d83SEric Dumazet 		return &per_cpu(rt_cache_stat, cpu);
2751da177e4SLinus Torvalds 	}
2761da177e4SLinus Torvalds 	return NULL;
2771da177e4SLinus Torvalds 
2781da177e4SLinus Torvalds }
2791da177e4SLinus Torvalds 
2801da177e4SLinus Torvalds static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
2811da177e4SLinus Torvalds {
2821da177e4SLinus Torvalds 
2831da177e4SLinus Torvalds }
2841da177e4SLinus Torvalds 
2851da177e4SLinus Torvalds static int rt_cpu_seq_show(struct seq_file *seq, void *v)
2861da177e4SLinus Torvalds {
2871da177e4SLinus Torvalds 	struct rt_cache_stat *st = v;
2881da177e4SLinus Torvalds 
2891da177e4SLinus Torvalds 	if (v == SEQ_START_TOKEN) {
2905bec0039SOlaf Rempel 		seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
2911da177e4SLinus Torvalds 		return 0;
2921da177e4SLinus Torvalds 	}
2931da177e4SLinus Torvalds 
2941da177e4SLinus Torvalds 	seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
2951da177e4SLinus Torvalds 		   " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
296fc66f95cSEric Dumazet 		   dst_entries_get_slow(&ipv4_dst_ops),
2970baf2b35SEric Dumazet 		   0, /* st->in_hit */
2981da177e4SLinus Torvalds 		   st->in_slow_tot,
2991da177e4SLinus Torvalds 		   st->in_slow_mc,
3001da177e4SLinus Torvalds 		   st->in_no_route,
3011da177e4SLinus Torvalds 		   st->in_brd,
3021da177e4SLinus Torvalds 		   st->in_martian_dst,
3031da177e4SLinus Torvalds 		   st->in_martian_src,
3041da177e4SLinus Torvalds 
3050baf2b35SEric Dumazet 		   0, /* st->out_hit */
3061da177e4SLinus Torvalds 		   st->out_slow_tot,
3071da177e4SLinus Torvalds 		   st->out_slow_mc,
3081da177e4SLinus Torvalds 
3090baf2b35SEric Dumazet 		   0, /* st->gc_total */
3100baf2b35SEric Dumazet 		   0, /* st->gc_ignored */
3110baf2b35SEric Dumazet 		   0, /* st->gc_goal_miss */
3120baf2b35SEric Dumazet 		   0, /* st->gc_dst_overflow */
3130baf2b35SEric Dumazet 		   0, /* st->in_hlist_search */
3140baf2b35SEric Dumazet 		   0  /* st->out_hlist_search */
3151da177e4SLinus Torvalds 		);
3161da177e4SLinus Torvalds 	return 0;
3171da177e4SLinus Torvalds }
3181da177e4SLinus Torvalds 
319f690808eSStephen Hemminger static const struct seq_operations rt_cpu_seq_ops = {
3201da177e4SLinus Torvalds 	.start  = rt_cpu_seq_start,
3211da177e4SLinus Torvalds 	.next   = rt_cpu_seq_next,
3221da177e4SLinus Torvalds 	.stop   = rt_cpu_seq_stop,
3231da177e4SLinus Torvalds 	.show   = rt_cpu_seq_show,
3241da177e4SLinus Torvalds };
3251da177e4SLinus Torvalds 
3261da177e4SLinus Torvalds 
3271da177e4SLinus Torvalds static int rt_cpu_seq_open(struct inode *inode, struct file *file)
3281da177e4SLinus Torvalds {
3291da177e4SLinus Torvalds 	return seq_open(file, &rt_cpu_seq_ops);
3301da177e4SLinus Torvalds }
3311da177e4SLinus Torvalds 
3329a32144eSArjan van de Ven static const struct file_operations rt_cpu_seq_fops = {
3331da177e4SLinus Torvalds 	.open	 = rt_cpu_seq_open,
3341da177e4SLinus Torvalds 	.read	 = seq_read,
3351da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
3361da177e4SLinus Torvalds 	.release = seq_release,
3371da177e4SLinus Torvalds };
3381da177e4SLinus Torvalds 
339c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
340a661c419SAlexey Dobriyan static int rt_acct_proc_show(struct seq_file *m, void *v)
34178c686e9SPavel Emelyanov {
342a661c419SAlexey Dobriyan 	struct ip_rt_acct *dst, *src;
343a661c419SAlexey Dobriyan 	unsigned int i, j;
34478c686e9SPavel Emelyanov 
345a661c419SAlexey Dobriyan 	dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
346a661c419SAlexey Dobriyan 	if (!dst)
347a661c419SAlexey Dobriyan 		return -ENOMEM;
34878c686e9SPavel Emelyanov 
349a661c419SAlexey Dobriyan 	for_each_possible_cpu(i) {
350a661c419SAlexey Dobriyan 		src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
351a661c419SAlexey Dobriyan 		for (j = 0; j < 256; j++) {
352a661c419SAlexey Dobriyan 			dst[j].o_bytes   += src[j].o_bytes;
353a661c419SAlexey Dobriyan 			dst[j].o_packets += src[j].o_packets;
354a661c419SAlexey Dobriyan 			dst[j].i_bytes   += src[j].i_bytes;
355a661c419SAlexey Dobriyan 			dst[j].i_packets += src[j].i_packets;
356a661c419SAlexey Dobriyan 		}
357a661c419SAlexey Dobriyan 	}
358a661c419SAlexey Dobriyan 
359a661c419SAlexey Dobriyan 	seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
360a661c419SAlexey Dobriyan 	kfree(dst);
36178c686e9SPavel Emelyanov 	return 0;
36278c686e9SPavel Emelyanov }
36378c686e9SPavel Emelyanov #endif
364107f1634SPavel Emelyanov 
36573b38711SDenis V. Lunev static int __net_init ip_rt_do_proc_init(struct net *net)
366107f1634SPavel Emelyanov {
367107f1634SPavel Emelyanov 	struct proc_dir_entry *pde;
368107f1634SPavel Emelyanov 
369d6444062SJoe Perches 	pde = proc_create("rt_cache", 0444, net->proc_net,
370107f1634SPavel Emelyanov 			  &rt_cache_seq_fops);
371107f1634SPavel Emelyanov 	if (!pde)
372107f1634SPavel Emelyanov 		goto err1;
373107f1634SPavel Emelyanov 
374d6444062SJoe Perches 	pde = proc_create("rt_cache", 0444,
37577020720SWang Chen 			  net->proc_net_stat, &rt_cpu_seq_fops);
376107f1634SPavel Emelyanov 	if (!pde)
377107f1634SPavel Emelyanov 		goto err2;
378107f1634SPavel Emelyanov 
379c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
3803f3942acSChristoph Hellwig 	pde = proc_create_single("rt_acct", 0, net->proc_net,
3813f3942acSChristoph Hellwig 			rt_acct_proc_show);
382107f1634SPavel Emelyanov 	if (!pde)
383107f1634SPavel Emelyanov 		goto err3;
384107f1634SPavel Emelyanov #endif
385107f1634SPavel Emelyanov 	return 0;
386107f1634SPavel Emelyanov 
387c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
388107f1634SPavel Emelyanov err3:
389107f1634SPavel Emelyanov 	remove_proc_entry("rt_cache", net->proc_net_stat);
390107f1634SPavel Emelyanov #endif
391107f1634SPavel Emelyanov err2:
392107f1634SPavel Emelyanov 	remove_proc_entry("rt_cache", net->proc_net);
393107f1634SPavel Emelyanov err1:
394107f1634SPavel Emelyanov 	return -ENOMEM;
395107f1634SPavel Emelyanov }
39673b38711SDenis V. Lunev 
39773b38711SDenis V. Lunev static void __net_exit ip_rt_do_proc_exit(struct net *net)
39873b38711SDenis V. Lunev {
39973b38711SDenis V. Lunev 	remove_proc_entry("rt_cache", net->proc_net_stat);
40073b38711SDenis V. Lunev 	remove_proc_entry("rt_cache", net->proc_net);
401c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
40273b38711SDenis V. Lunev 	remove_proc_entry("rt_acct", net->proc_net);
4030a931acfSAlexey Dobriyan #endif
40473b38711SDenis V. Lunev }
40573b38711SDenis V. Lunev 
40673b38711SDenis V. Lunev static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
40773b38711SDenis V. Lunev 	.init = ip_rt_do_proc_init,
40873b38711SDenis V. Lunev 	.exit = ip_rt_do_proc_exit,
40973b38711SDenis V. Lunev };
41073b38711SDenis V. Lunev 
41173b38711SDenis V. Lunev static int __init ip_rt_proc_init(void)
41273b38711SDenis V. Lunev {
41373b38711SDenis V. Lunev 	return register_pernet_subsys(&ip_rt_proc_ops);
41473b38711SDenis V. Lunev }
41573b38711SDenis V. Lunev 
416107f1634SPavel Emelyanov #else
41773b38711SDenis V. Lunev static inline int ip_rt_proc_init(void)
418107f1634SPavel Emelyanov {
419107f1634SPavel Emelyanov 	return 0;
420107f1634SPavel Emelyanov }
4211da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */
4221da177e4SLinus Torvalds 
4234331debcSEric Dumazet static inline bool rt_is_expired(const struct rtable *rth)
424e84f84f2SDenis V. Lunev {
425ca4c3fc2Sfan.du 	return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
426e84f84f2SDenis V. Lunev }
427e84f84f2SDenis V. Lunev 
4284ccfe6d4SNicolas Dichtel void rt_cache_flush(struct net *net)
42929e75252SEric Dumazet {
430ca4c3fc2Sfan.du 	rt_genid_bump_ipv4(net);
43198376387SEric Dumazet }
43298376387SEric Dumazet 
433f894cbf8SDavid S. Miller static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
434f894cbf8SDavid S. Miller 					   struct sk_buff *skb,
435f894cbf8SDavid S. Miller 					   const void *daddr)
4363769cffbSDavid Miller {
437d3aaeb38SDavid S. Miller 	struct net_device *dev = dst->dev;
438d3aaeb38SDavid S. Miller 	const __be32 *pkey = daddr;
43939232973SDavid S. Miller 	const struct rtable *rt;
4403769cffbSDavid Miller 	struct neighbour *n;
4413769cffbSDavid Miller 
44239232973SDavid S. Miller 	rt = (const struct rtable *) dst;
443a263b309SDavid S. Miller 	if (rt->rt_gateway)
44439232973SDavid S. Miller 		pkey = (const __be32 *) &rt->rt_gateway;
445f894cbf8SDavid S. Miller 	else if (skb)
446f894cbf8SDavid S. Miller 		pkey = &ip_hdr(skb)->daddr;
447d3aaeb38SDavid S. Miller 
44880703d26SDavid S. Miller 	n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
449d3aaeb38SDavid S. Miller 	if (n)
450d3aaeb38SDavid S. Miller 		return n;
45132092ecfSDavid Miller 	return neigh_create(&arp_tbl, pkey, dev);
452d3aaeb38SDavid S. Miller }
453d3aaeb38SDavid S. Miller 
45463fca65dSJulian Anastasov static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
45563fca65dSJulian Anastasov {
45663fca65dSJulian Anastasov 	struct net_device *dev = dst->dev;
45763fca65dSJulian Anastasov 	const __be32 *pkey = daddr;
45863fca65dSJulian Anastasov 	const struct rtable *rt;
45963fca65dSJulian Anastasov 
46063fca65dSJulian Anastasov 	rt = (const struct rtable *)dst;
46163fca65dSJulian Anastasov 	if (rt->rt_gateway)
46263fca65dSJulian Anastasov 		pkey = (const __be32 *)&rt->rt_gateway;
46363fca65dSJulian Anastasov 	else if (!daddr ||
46463fca65dSJulian Anastasov 		 (rt->rt_flags &
46563fca65dSJulian Anastasov 		  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
46663fca65dSJulian Anastasov 		return;
46763fca65dSJulian Anastasov 
46863fca65dSJulian Anastasov 	__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
46963fca65dSJulian Anastasov }
47063fca65dSJulian Anastasov 
47104ca6973SEric Dumazet #define IP_IDENTS_SZ 2048u
47204ca6973SEric Dumazet 
473355b590cSEric Dumazet static atomic_t *ip_idents __read_mostly;
474355b590cSEric Dumazet static u32 *ip_tstamps __read_mostly;
47504ca6973SEric Dumazet 
47604ca6973SEric Dumazet /* In order to protect privacy, we add a perturbation to identifiers
47704ca6973SEric Dumazet  * if one generator is seldom used. This makes hard for an attacker
47804ca6973SEric Dumazet  * to infer how many packets were sent between two points in time.
47904ca6973SEric Dumazet  */
48004ca6973SEric Dumazet u32 ip_idents_reserve(u32 hash, int segs)
48104ca6973SEric Dumazet {
482355b590cSEric Dumazet 	u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
483355b590cSEric Dumazet 	atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
4846aa7de05SMark Rutland 	u32 old = READ_ONCE(*p_tstamp);
48504ca6973SEric Dumazet 	u32 now = (u32)jiffies;
486adb03115SEric Dumazet 	u32 new, delta = 0;
48704ca6973SEric Dumazet 
488355b590cSEric Dumazet 	if (old != now && cmpxchg(p_tstamp, old, now) == old)
48904ca6973SEric Dumazet 		delta = prandom_u32_max(now - old);
49004ca6973SEric Dumazet 
491adb03115SEric Dumazet 	/* Do not use atomic_add_return() as it makes UBSAN unhappy */
492adb03115SEric Dumazet 	do {
493adb03115SEric Dumazet 		old = (u32)atomic_read(p_id);
494adb03115SEric Dumazet 		new = old + delta + segs;
495adb03115SEric Dumazet 	} while (atomic_cmpxchg(p_id, old, new) != old);
496adb03115SEric Dumazet 
497adb03115SEric Dumazet 	return new - segs;
49804ca6973SEric Dumazet }
49904ca6973SEric Dumazet EXPORT_SYMBOL(ip_idents_reserve);
50073f156a6SEric Dumazet 
501b6a7719aSHannes Frederic Sowa void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
5021da177e4SLinus Torvalds {
50373f156a6SEric Dumazet 	static u32 ip_idents_hashrnd __read_mostly;
50473f156a6SEric Dumazet 	u32 hash, id;
5051da177e4SLinus Torvalds 
50673f156a6SEric Dumazet 	net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
5071da177e4SLinus Torvalds 
50804ca6973SEric Dumazet 	hash = jhash_3words((__force u32)iph->daddr,
50904ca6973SEric Dumazet 			    (__force u32)iph->saddr,
510b6a7719aSHannes Frederic Sowa 			    iph->protocol ^ net_hash_mix(net),
51104ca6973SEric Dumazet 			    ip_idents_hashrnd);
51273f156a6SEric Dumazet 	id = ip_idents_reserve(hash, segs);
51373f156a6SEric Dumazet 	iph->id = htons(id);
5141da177e4SLinus Torvalds }
5154bc2f18bSEric Dumazet EXPORT_SYMBOL(__ip_select_ident);
5161da177e4SLinus Torvalds 
517e2d118a1SLorenzo Colitti static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
518e2d118a1SLorenzo Colitti 			     const struct sock *sk,
5194895c771SDavid S. Miller 			     const struct iphdr *iph,
5204895c771SDavid S. Miller 			     int oif, u8 tos,
5214895c771SDavid S. Miller 			     u8 prot, u32 mark, int flow_flags)
5224895c771SDavid S. Miller {
5234895c771SDavid S. Miller 	if (sk) {
5244895c771SDavid S. Miller 		const struct inet_sock *inet = inet_sk(sk);
5254895c771SDavid S. Miller 
5264895c771SDavid S. Miller 		oif = sk->sk_bound_dev_if;
5274895c771SDavid S. Miller 		mark = sk->sk_mark;
5284895c771SDavid S. Miller 		tos = RT_CONN_FLAGS(sk);
5294895c771SDavid S. Miller 		prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
5304895c771SDavid S. Miller 	}
5314895c771SDavid S. Miller 	flowi4_init_output(fl4, oif, mark, tos,
5324895c771SDavid S. Miller 			   RT_SCOPE_UNIVERSE, prot,
5334895c771SDavid S. Miller 			   flow_flags,
534e2d118a1SLorenzo Colitti 			   iph->daddr, iph->saddr, 0, 0,
535e2d118a1SLorenzo Colitti 			   sock_net_uid(net, sk));
5364895c771SDavid S. Miller }
5374895c771SDavid S. Miller 
5385abf7f7eSEric Dumazet static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
5395abf7f7eSEric Dumazet 			       const struct sock *sk)
5404895c771SDavid S. Miller {
541d109e61bSLorenzo Colitti 	const struct net *net = dev_net(skb->dev);
5424895c771SDavid S. Miller 	const struct iphdr *iph = ip_hdr(skb);
5434895c771SDavid S. Miller 	int oif = skb->dev->ifindex;
5444895c771SDavid S. Miller 	u8 tos = RT_TOS(iph->tos);
5454895c771SDavid S. Miller 	u8 prot = iph->protocol;
5464895c771SDavid S. Miller 	u32 mark = skb->mark;
5474895c771SDavid S. Miller 
548d109e61bSLorenzo Colitti 	__build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
5494895c771SDavid S. Miller }
5504895c771SDavid S. Miller 
5515abf7f7eSEric Dumazet static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
5524895c771SDavid S. Miller {
5534895c771SDavid S. Miller 	const struct inet_sock *inet = inet_sk(sk);
5545abf7f7eSEric Dumazet 	const struct ip_options_rcu *inet_opt;
5554895c771SDavid S. Miller 	__be32 daddr = inet->inet_daddr;
5564895c771SDavid S. Miller 
5574895c771SDavid S. Miller 	rcu_read_lock();
5584895c771SDavid S. Miller 	inet_opt = rcu_dereference(inet->inet_opt);
5594895c771SDavid S. Miller 	if (inet_opt && inet_opt->opt.srr)
5604895c771SDavid S. Miller 		daddr = inet_opt->opt.faddr;
5614895c771SDavid S. Miller 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
5624895c771SDavid S. Miller 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
5634895c771SDavid S. Miller 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
5644895c771SDavid S. Miller 			   inet_sk_flowi_flags(sk),
565e2d118a1SLorenzo Colitti 			   daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
5664895c771SDavid S. Miller 	rcu_read_unlock();
5674895c771SDavid S. Miller }
5684895c771SDavid S. Miller 
5695abf7f7eSEric Dumazet static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
5705abf7f7eSEric Dumazet 				 const struct sk_buff *skb)
5714895c771SDavid S. Miller {
5724895c771SDavid S. Miller 	if (skb)
5734895c771SDavid S. Miller 		build_skb_flow_key(fl4, skb, sk);
5744895c771SDavid S. Miller 	else
5754895c771SDavid S. Miller 		build_sk_flow_key(fl4, sk);
5764895c771SDavid S. Miller }
5774895c771SDavid S. Miller 
578c5038a83SDavid S. Miller static DEFINE_SPINLOCK(fnhe_lock);
5794895c771SDavid S. Miller 
5802ffae99dSTimo Teräs static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
5812ffae99dSTimo Teräs {
5822ffae99dSTimo Teräs 	struct rtable *rt;
5832ffae99dSTimo Teräs 
5842ffae99dSTimo Teräs 	rt = rcu_dereference(fnhe->fnhe_rth_input);
5852ffae99dSTimo Teräs 	if (rt) {
5862ffae99dSTimo Teräs 		RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
58795c47f9cSWei Wang 		dst_dev_put(&rt->dst);
5880830106cSWei Wang 		dst_release(&rt->dst);
5892ffae99dSTimo Teräs 	}
5902ffae99dSTimo Teräs 	rt = rcu_dereference(fnhe->fnhe_rth_output);
5912ffae99dSTimo Teräs 	if (rt) {
5922ffae99dSTimo Teräs 		RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
59395c47f9cSWei Wang 		dst_dev_put(&rt->dst);
5940830106cSWei Wang 		dst_release(&rt->dst);
5952ffae99dSTimo Teräs 	}
5962ffae99dSTimo Teräs }
5972ffae99dSTimo Teräs 
598aee06da6SJulian Anastasov static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
5994895c771SDavid S. Miller {
6004895c771SDavid S. Miller 	struct fib_nh_exception *fnhe, *oldest;
6014895c771SDavid S. Miller 
6024895c771SDavid S. Miller 	oldest = rcu_dereference(hash->chain);
6034895c771SDavid S. Miller 	for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
6044895c771SDavid S. Miller 	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
6054895c771SDavid S. Miller 		if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
6064895c771SDavid S. Miller 			oldest = fnhe;
6074895c771SDavid S. Miller 	}
6082ffae99dSTimo Teräs 	fnhe_flush_routes(oldest);
6094895c771SDavid S. Miller 	return oldest;
6104895c771SDavid S. Miller }
6114895c771SDavid S. Miller 
612d3a25c98SDavid S. Miller static inline u32 fnhe_hashfun(__be32 daddr)
613d3a25c98SDavid S. Miller {
614d546c621SEric Dumazet 	static u32 fnhe_hashrnd __read_mostly;
615d3a25c98SDavid S. Miller 	u32 hval;
616d3a25c98SDavid S. Miller 
617d546c621SEric Dumazet 	net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
618d546c621SEric Dumazet 	hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
619d546c621SEric Dumazet 	return hash_32(hval, FNHE_HASH_SHIFT);
620d3a25c98SDavid S. Miller }
621d3a25c98SDavid S. Miller 
622387aa65aSTimo Teräs static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
623387aa65aSTimo Teräs {
624387aa65aSTimo Teräs 	rt->rt_pmtu = fnhe->fnhe_pmtu;
625d52e5a7eSSabrina Dubroca 	rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
626387aa65aSTimo Teräs 	rt->dst.expires = fnhe->fnhe_expires;
627387aa65aSTimo Teräs 
628387aa65aSTimo Teräs 	if (fnhe->fnhe_gw) {
629387aa65aSTimo Teräs 		rt->rt_flags |= RTCF_REDIRECTED;
630387aa65aSTimo Teräs 		rt->rt_gateway = fnhe->fnhe_gw;
631387aa65aSTimo Teräs 		rt->rt_uses_gateway = 1;
632387aa65aSTimo Teräs 	}
633387aa65aSTimo Teräs }
634387aa65aSTimo Teräs 
635aee06da6SJulian Anastasov static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
636d52e5a7eSSabrina Dubroca 				  u32 pmtu, bool lock, unsigned long expires)
6374895c771SDavid S. Miller {
638aee06da6SJulian Anastasov 	struct fnhe_hash_bucket *hash;
6394895c771SDavid S. Miller 	struct fib_nh_exception *fnhe;
640387aa65aSTimo Teräs 	struct rtable *rt;
641cebe84c6SXin Long 	u32 genid, hval;
642387aa65aSTimo Teräs 	unsigned int i;
6434895c771SDavid S. Miller 	int depth;
644cebe84c6SXin Long 
645cebe84c6SXin Long 	genid = fnhe_genid(dev_net(nh->nh_dev));
646cebe84c6SXin Long 	hval = fnhe_hashfun(daddr);
6474895c771SDavid S. Miller 
648c5038a83SDavid S. Miller 	spin_lock_bh(&fnhe_lock);
649aee06da6SJulian Anastasov 
650caa41527SEric Dumazet 	hash = rcu_dereference(nh->nh_exceptions);
6514895c771SDavid S. Miller 	if (!hash) {
6526396bb22SKees Cook 		hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
6534895c771SDavid S. Miller 		if (!hash)
654aee06da6SJulian Anastasov 			goto out_unlock;
655caa41527SEric Dumazet 		rcu_assign_pointer(nh->nh_exceptions, hash);
6564895c771SDavid S. Miller 	}
6574895c771SDavid S. Miller 
6584895c771SDavid S. Miller 	hash += hval;
6594895c771SDavid S. Miller 
6604895c771SDavid S. Miller 	depth = 0;
6614895c771SDavid S. Miller 	for (fnhe = rcu_dereference(hash->chain); fnhe;
6624895c771SDavid S. Miller 	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
6634895c771SDavid S. Miller 		if (fnhe->fnhe_daddr == daddr)
664aee06da6SJulian Anastasov 			break;
6654895c771SDavid S. Miller 		depth++;
6664895c771SDavid S. Miller 	}
6674895c771SDavid S. Miller 
668aee06da6SJulian Anastasov 	if (fnhe) {
669cebe84c6SXin Long 		if (fnhe->fnhe_genid != genid)
670cebe84c6SXin Long 			fnhe->fnhe_genid = genid;
671aee06da6SJulian Anastasov 		if (gw)
672aee06da6SJulian Anastasov 			fnhe->fnhe_gw = gw;
673d52e5a7eSSabrina Dubroca 		if (pmtu) {
674aee06da6SJulian Anastasov 			fnhe->fnhe_pmtu = pmtu;
675d52e5a7eSSabrina Dubroca 			fnhe->fnhe_mtu_locked = lock;
676d52e5a7eSSabrina Dubroca 		}
677387aa65aSTimo Teräs 		fnhe->fnhe_expires = max(1UL, expires);
678387aa65aSTimo Teräs 		/* Update all cached dsts too */
6792ffae99dSTimo Teräs 		rt = rcu_dereference(fnhe->fnhe_rth_input);
6802ffae99dSTimo Teräs 		if (rt)
6812ffae99dSTimo Teräs 			fill_route_from_fnhe(rt, fnhe);
6822ffae99dSTimo Teräs 		rt = rcu_dereference(fnhe->fnhe_rth_output);
683387aa65aSTimo Teräs 		if (rt)
684387aa65aSTimo Teräs 			fill_route_from_fnhe(rt, fnhe);
685aee06da6SJulian Anastasov 	} else {
686aee06da6SJulian Anastasov 		if (depth > FNHE_RECLAIM_DEPTH)
687aee06da6SJulian Anastasov 			fnhe = fnhe_oldest(hash);
688aee06da6SJulian Anastasov 		else {
6894895c771SDavid S. Miller 			fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
6904895c771SDavid S. Miller 			if (!fnhe)
691aee06da6SJulian Anastasov 				goto out_unlock;
6924895c771SDavid S. Miller 
6934895c771SDavid S. Miller 			fnhe->fnhe_next = hash->chain;
6944895c771SDavid S. Miller 			rcu_assign_pointer(hash->chain, fnhe);
695aee06da6SJulian Anastasov 		}
696cebe84c6SXin Long 		fnhe->fnhe_genid = genid;
6974895c771SDavid S. Miller 		fnhe->fnhe_daddr = daddr;
698aee06da6SJulian Anastasov 		fnhe->fnhe_gw = gw;
699aee06da6SJulian Anastasov 		fnhe->fnhe_pmtu = pmtu;
700d52e5a7eSSabrina Dubroca 		fnhe->fnhe_mtu_locked = lock;
70194720e3aSJulian Anastasov 		fnhe->fnhe_expires = max(1UL, expires);
702387aa65aSTimo Teräs 
703387aa65aSTimo Teräs 		/* Exception created; mark the cached routes for the nexthop
704387aa65aSTimo Teräs 		 * stale, so anyone caching it rechecks if this exception
705387aa65aSTimo Teräs 		 * applies to them.
706387aa65aSTimo Teräs 		 */
7072ffae99dSTimo Teräs 		rt = rcu_dereference(nh->nh_rth_input);
7082ffae99dSTimo Teräs 		if (rt)
7092ffae99dSTimo Teräs 			rt->dst.obsolete = DST_OBSOLETE_KILL;
7102ffae99dSTimo Teräs 
711387aa65aSTimo Teräs 		for_each_possible_cpu(i) {
712387aa65aSTimo Teräs 			struct rtable __rcu **prt;
713387aa65aSTimo Teräs 			prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
714387aa65aSTimo Teräs 			rt = rcu_dereference(*prt);
715387aa65aSTimo Teräs 			if (rt)
716387aa65aSTimo Teräs 				rt->dst.obsolete = DST_OBSOLETE_KILL;
717387aa65aSTimo Teräs 		}
718aee06da6SJulian Anastasov 	}
719aee06da6SJulian Anastasov 
7204895c771SDavid S. Miller 	fnhe->fnhe_stamp = jiffies;
721aee06da6SJulian Anastasov 
722aee06da6SJulian Anastasov out_unlock:
723c5038a83SDavid S. Miller 	spin_unlock_bh(&fnhe_lock);
7244895c771SDavid S. Miller }
7254895c771SDavid S. Miller 
726ceb33206SDavid S. Miller static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
727ceb33206SDavid S. Miller 			     bool kill_route)
7281da177e4SLinus Torvalds {
729e47a185bSDavid S. Miller 	__be32 new_gw = icmp_hdr(skb)->un.gateway;
73094206125SDavid S. Miller 	__be32 old_gw = ip_hdr(skb)->saddr;
731e47a185bSDavid S. Miller 	struct net_device *dev = skb->dev;
732e47a185bSDavid S. Miller 	struct in_device *in_dev;
7334895c771SDavid S. Miller 	struct fib_result res;
734e47a185bSDavid S. Miller 	struct neighbour *n;
735317805b8SDenis V. Lunev 	struct net *net;
7361da177e4SLinus Torvalds 
73794206125SDavid S. Miller 	switch (icmp_hdr(skb)->code & 7) {
73894206125SDavid S. Miller 	case ICMP_REDIR_NET:
73994206125SDavid S. Miller 	case ICMP_REDIR_NETTOS:
74094206125SDavid S. Miller 	case ICMP_REDIR_HOST:
74194206125SDavid S. Miller 	case ICMP_REDIR_HOSTTOS:
74294206125SDavid S. Miller 		break;
74394206125SDavid S. Miller 
74494206125SDavid S. Miller 	default:
74594206125SDavid S. Miller 		return;
74694206125SDavid S. Miller 	}
74794206125SDavid S. Miller 
748e47a185bSDavid S. Miller 	if (rt->rt_gateway != old_gw)
749e47a185bSDavid S. Miller 		return;
750e47a185bSDavid S. Miller 
751e47a185bSDavid S. Miller 	in_dev = __in_dev_get_rcu(dev);
752e47a185bSDavid S. Miller 	if (!in_dev)
753e47a185bSDavid S. Miller 		return;
754e47a185bSDavid S. Miller 
755c346dca1SYOSHIFUJI Hideaki 	net = dev_net(dev);
7569d4fb27dSJoe Perches 	if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
7579d4fb27dSJoe Perches 	    ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
7589d4fb27dSJoe Perches 	    ipv4_is_zeronet(new_gw))
7591da177e4SLinus Torvalds 		goto reject_redirect;
7601da177e4SLinus Torvalds 
7611da177e4SLinus Torvalds 	if (!IN_DEV_SHARED_MEDIA(in_dev)) {
7621da177e4SLinus Torvalds 		if (!inet_addr_onlink(in_dev, new_gw, old_gw))
7631da177e4SLinus Torvalds 			goto reject_redirect;
7641da177e4SLinus Torvalds 		if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
7651da177e4SLinus Torvalds 			goto reject_redirect;
7661da177e4SLinus Torvalds 	} else {
767317805b8SDenis V. Lunev 		if (inet_addr_type(net, new_gw) != RTN_UNICAST)
7681da177e4SLinus Torvalds 			goto reject_redirect;
7691da177e4SLinus Torvalds 	}
7701da177e4SLinus Torvalds 
771969447f2SStephen Suryaputra Lin 	n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
772969447f2SStephen Suryaputra Lin 	if (!n)
773969447f2SStephen Suryaputra Lin 		n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
7742c1a4311SWANG Cong 	if (!IS_ERR(n)) {
775e47a185bSDavid S. Miller 		if (!(n->nud_state & NUD_VALID)) {
776e47a185bSDavid S. Miller 			neigh_event_send(n, NULL);
777e47a185bSDavid S. Miller 		} else {
7780eeb075fSAndy Gospodarek 			if (fib_lookup(net, fl4, &res, 0) == 0) {
7794895c771SDavid S. Miller 				struct fib_nh *nh = &FIB_RES_NH(res);
7804895c771SDavid S. Miller 
781aee06da6SJulian Anastasov 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
782d52e5a7eSSabrina Dubroca 						0, false,
783d52e5a7eSSabrina Dubroca 						jiffies + ip_rt_gc_timeout);
7844895c771SDavid S. Miller 			}
785ceb33206SDavid S. Miller 			if (kill_route)
786ceb33206SDavid S. Miller 				rt->dst.obsolete = DST_OBSOLETE_KILL;
787e47a185bSDavid S. Miller 			call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
788e47a185bSDavid S. Miller 		}
789e47a185bSDavid S. Miller 		neigh_release(n);
790e47a185bSDavid S. Miller 	}
791e47a185bSDavid S. Miller 	return;
792e47a185bSDavid S. Miller 
793e47a185bSDavid S. Miller reject_redirect:
794e47a185bSDavid S. Miller #ifdef CONFIG_IP_ROUTE_VERBOSE
79599ee038dSDavid S. Miller 	if (IN_DEV_LOG_MARTIANS(in_dev)) {
79699ee038dSDavid S. Miller 		const struct iphdr *iph = (const struct iphdr *) skb->data;
79799ee038dSDavid S. Miller 		__be32 daddr = iph->daddr;
79899ee038dSDavid S. Miller 		__be32 saddr = iph->saddr;
79999ee038dSDavid S. Miller 
800e47a185bSDavid S. Miller 		net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
801e47a185bSDavid S. Miller 				     "  Advised path = %pI4 -> %pI4\n",
802e47a185bSDavid S. Miller 				     &old_gw, dev->name, &new_gw,
803e47a185bSDavid S. Miller 				     &saddr, &daddr);
80499ee038dSDavid S. Miller 	}
805e47a185bSDavid S. Miller #endif
806e47a185bSDavid S. Miller 	;
807e47a185bSDavid S. Miller }
808e47a185bSDavid S. Miller 
8094895c771SDavid S. Miller static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
8104895c771SDavid S. Miller {
8114895c771SDavid S. Miller 	struct rtable *rt;
8124895c771SDavid S. Miller 	struct flowi4 fl4;
813f96ef988SMichal Kubecek 	const struct iphdr *iph = (const struct iphdr *) skb->data;
8147d995694SLorenzo Colitti 	struct net *net = dev_net(skb->dev);
815f96ef988SMichal Kubecek 	int oif = skb->dev->ifindex;
816f96ef988SMichal Kubecek 	u8 tos = RT_TOS(iph->tos);
817f96ef988SMichal Kubecek 	u8 prot = iph->protocol;
818f96ef988SMichal Kubecek 	u32 mark = skb->mark;
8194895c771SDavid S. Miller 
8204895c771SDavid S. Miller 	rt = (struct rtable *) dst;
8214895c771SDavid S. Miller 
8227d995694SLorenzo Colitti 	__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
823ceb33206SDavid S. Miller 	__ip_do_redirect(rt, skb, &fl4, true);
8244895c771SDavid S. Miller }
8254895c771SDavid S. Miller 
8261da177e4SLinus Torvalds static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
8271da177e4SLinus Torvalds {
8281da177e4SLinus Torvalds 	struct rtable *rt = (struct rtable *)dst;
8291da177e4SLinus Torvalds 	struct dst_entry *ret = dst;
8301da177e4SLinus Torvalds 
8311da177e4SLinus Torvalds 	if (rt) {
832d11a4dc1STimo Teräs 		if (dst->obsolete > 0) {
8331da177e4SLinus Torvalds 			ip_rt_put(rt);
8341da177e4SLinus Torvalds 			ret = NULL;
8355943634fSDavid S. Miller 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
8365943634fSDavid S. Miller 			   rt->dst.expires) {
83789aef892SDavid S. Miller 			ip_rt_put(rt);
8381da177e4SLinus Torvalds 			ret = NULL;
8391da177e4SLinus Torvalds 		}
8401da177e4SLinus Torvalds 	}
8411da177e4SLinus Torvalds 	return ret;
8421da177e4SLinus Torvalds }
8431da177e4SLinus Torvalds 
8441da177e4SLinus Torvalds /*
8451da177e4SLinus Torvalds  * Algorithm:
8461da177e4SLinus Torvalds  *	1. The first ip_rt_redirect_number redirects are sent
8471da177e4SLinus Torvalds  *	   with exponential backoff, then we stop sending them at all,
8481da177e4SLinus Torvalds  *	   assuming that the host ignores our redirects.
8491da177e4SLinus Torvalds  *	2. If we did not see packets requiring redirects
8501da177e4SLinus Torvalds  *	   during ip_rt_redirect_silence, we assume that the host
8511da177e4SLinus Torvalds  *	   forgot redirected route and start to send redirects again.
8521da177e4SLinus Torvalds  *
8531da177e4SLinus Torvalds  * This algorithm is much cheaper and more intelligent than dumb load limiting
8541da177e4SLinus Torvalds  * in icmp.c.
8551da177e4SLinus Torvalds  *
8561da177e4SLinus Torvalds  * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
8571da177e4SLinus Torvalds  * and "frag. need" (breaks PMTU discovery) in icmp.c.
8581da177e4SLinus Torvalds  */
8591da177e4SLinus Torvalds 
8601da177e4SLinus Torvalds void ip_rt_send_redirect(struct sk_buff *skb)
8611da177e4SLinus Torvalds {
862511c3f92SEric Dumazet 	struct rtable *rt = skb_rtable(skb);
86330038fc6SEric Dumazet 	struct in_device *in_dev;
86492d86829SDavid S. Miller 	struct inet_peer *peer;
8651d861aa4SDavid S. Miller 	struct net *net;
86630038fc6SEric Dumazet 	int log_martians;
867192132b9SDavid Ahern 	int vif;
8681da177e4SLinus Torvalds 
86930038fc6SEric Dumazet 	rcu_read_lock();
870d8d1f30bSChangli Gao 	in_dev = __in_dev_get_rcu(rt->dst.dev);
87130038fc6SEric Dumazet 	if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
87230038fc6SEric Dumazet 		rcu_read_unlock();
8731da177e4SLinus Torvalds 		return;
87430038fc6SEric Dumazet 	}
87530038fc6SEric Dumazet 	log_martians = IN_DEV_LOG_MARTIANS(in_dev);
876385add90SDavid Ahern 	vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
87730038fc6SEric Dumazet 	rcu_read_unlock();
8781da177e4SLinus Torvalds 
8791d861aa4SDavid S. Miller 	net = dev_net(rt->dst.dev);
880192132b9SDavid Ahern 	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
88192d86829SDavid S. Miller 	if (!peer) {
882e81da0e1SJulian Anastasov 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
883e81da0e1SJulian Anastasov 			  rt_nexthop(rt, ip_hdr(skb)->daddr));
88492d86829SDavid S. Miller 		return;
88592d86829SDavid S. Miller 	}
88692d86829SDavid S. Miller 
8871da177e4SLinus Torvalds 	/* No redirected packets during ip_rt_redirect_silence;
8881da177e4SLinus Torvalds 	 * reset the algorithm.
8891da177e4SLinus Torvalds 	 */
89092d86829SDavid S. Miller 	if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
89192d86829SDavid S. Miller 		peer->rate_tokens = 0;
8921da177e4SLinus Torvalds 
8931da177e4SLinus Torvalds 	/* Too many ignored redirects; do not send anything
894d8d1f30bSChangli Gao 	 * set dst.rate_last to the last seen redirected packet.
8951da177e4SLinus Torvalds 	 */
89692d86829SDavid S. Miller 	if (peer->rate_tokens >= ip_rt_redirect_number) {
89792d86829SDavid S. Miller 		peer->rate_last = jiffies;
8981d861aa4SDavid S. Miller 		goto out_put_peer;
8991da177e4SLinus Torvalds 	}
9001da177e4SLinus Torvalds 
9011da177e4SLinus Torvalds 	/* Check for load limit; set rate_last to the latest sent
9021da177e4SLinus Torvalds 	 * redirect.
9031da177e4SLinus Torvalds 	 */
90492d86829SDavid S. Miller 	if (peer->rate_tokens == 0 ||
90514fb8a76SLi Yewang 	    time_after(jiffies,
90692d86829SDavid S. Miller 		       (peer->rate_last +
90792d86829SDavid S. Miller 			(ip_rt_redirect_load << peer->rate_tokens)))) {
908e81da0e1SJulian Anastasov 		__be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
909e81da0e1SJulian Anastasov 
910e81da0e1SJulian Anastasov 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
91192d86829SDavid S. Miller 		peer->rate_last = jiffies;
91292d86829SDavid S. Miller 		++peer->rate_tokens;
9131da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE
91430038fc6SEric Dumazet 		if (log_martians &&
915e87cc472SJoe Perches 		    peer->rate_tokens == ip_rt_redirect_number)
916e87cc472SJoe Perches 			net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
91792101b3bSDavid S. Miller 					     &ip_hdr(skb)->saddr, inet_iif(skb),
918e81da0e1SJulian Anastasov 					     &ip_hdr(skb)->daddr, &gw);
9191da177e4SLinus Torvalds #endif
9201da177e4SLinus Torvalds 	}
9211d861aa4SDavid S. Miller out_put_peer:
9221d861aa4SDavid S. Miller 	inet_putpeer(peer);
9231da177e4SLinus Torvalds }
9241da177e4SLinus Torvalds 
9251da177e4SLinus Torvalds static int ip_error(struct sk_buff *skb)
9261da177e4SLinus Torvalds {
927511c3f92SEric Dumazet 	struct rtable *rt = skb_rtable(skb);
928e2c0dc1fSStephen Suryaputra 	struct net_device *dev = skb->dev;
929e2c0dc1fSStephen Suryaputra 	struct in_device *in_dev;
93092d86829SDavid S. Miller 	struct inet_peer *peer;
9311da177e4SLinus Torvalds 	unsigned long now;
932251da413SDavid S. Miller 	struct net *net;
93392d86829SDavid S. Miller 	bool send;
9341da177e4SLinus Torvalds 	int code;
9351da177e4SLinus Torvalds 
936e2c0dc1fSStephen Suryaputra 	if (netif_is_l3_master(skb->dev)) {
937e2c0dc1fSStephen Suryaputra 		dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
938e2c0dc1fSStephen Suryaputra 		if (!dev)
939e2c0dc1fSStephen Suryaputra 			goto out;
940e2c0dc1fSStephen Suryaputra 	}
941e2c0dc1fSStephen Suryaputra 
942e2c0dc1fSStephen Suryaputra 	in_dev = __in_dev_get_rcu(dev);
943e2c0dc1fSStephen Suryaputra 
944381c759dSEric W. Biederman 	/* IP on this device is disabled. */
945381c759dSEric W. Biederman 	if (!in_dev)
946381c759dSEric W. Biederman 		goto out;
947381c759dSEric W. Biederman 
948251da413SDavid S. Miller 	net = dev_net(rt->dst.dev);
949251da413SDavid S. Miller 	if (!IN_DEV_FORWARD(in_dev)) {
950251da413SDavid S. Miller 		switch (rt->dst.error) {
951251da413SDavid S. Miller 		case EHOSTUNREACH:
952b45386efSEric Dumazet 			__IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
953251da413SDavid S. Miller 			break;
954251da413SDavid S. Miller 
955251da413SDavid S. Miller 		case ENETUNREACH:
956b45386efSEric Dumazet 			__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
957251da413SDavid S. Miller 			break;
958251da413SDavid S. Miller 		}
959251da413SDavid S. Miller 		goto out;
960251da413SDavid S. Miller 	}
961251da413SDavid S. Miller 
962d8d1f30bSChangli Gao 	switch (rt->dst.error) {
9631da177e4SLinus Torvalds 	case EINVAL:
9641da177e4SLinus Torvalds 	default:
9651da177e4SLinus Torvalds 		goto out;
9661da177e4SLinus Torvalds 	case EHOSTUNREACH:
9671da177e4SLinus Torvalds 		code = ICMP_HOST_UNREACH;
9681da177e4SLinus Torvalds 		break;
9691da177e4SLinus Torvalds 	case ENETUNREACH:
9701da177e4SLinus Torvalds 		code = ICMP_NET_UNREACH;
971b45386efSEric Dumazet 		__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
9721da177e4SLinus Torvalds 		break;
9731da177e4SLinus Torvalds 	case EACCES:
9741da177e4SLinus Torvalds 		code = ICMP_PKT_FILTERED;
9751da177e4SLinus Torvalds 		break;
9761da177e4SLinus Torvalds 	}
9771da177e4SLinus Torvalds 
978192132b9SDavid Ahern 	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
979385add90SDavid Ahern 			       l3mdev_master_ifindex(skb->dev), 1);
98092d86829SDavid S. Miller 
98192d86829SDavid S. Miller 	send = true;
98292d86829SDavid S. Miller 	if (peer) {
9831da177e4SLinus Torvalds 		now = jiffies;
98492d86829SDavid S. Miller 		peer->rate_tokens += now - peer->rate_last;
98592d86829SDavid S. Miller 		if (peer->rate_tokens > ip_rt_error_burst)
98692d86829SDavid S. Miller 			peer->rate_tokens = ip_rt_error_burst;
98792d86829SDavid S. Miller 		peer->rate_last = now;
98892d86829SDavid S. Miller 		if (peer->rate_tokens >= ip_rt_error_cost)
98992d86829SDavid S. Miller 			peer->rate_tokens -= ip_rt_error_cost;
99092d86829SDavid S. Miller 		else
99192d86829SDavid S. Miller 			send = false;
9921d861aa4SDavid S. Miller 		inet_putpeer(peer);
9931da177e4SLinus Torvalds 	}
99492d86829SDavid S. Miller 	if (send)
99592d86829SDavid S. Miller 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
9961da177e4SLinus Torvalds 
9971da177e4SLinus Torvalds out:	kfree_skb(skb);
9981da177e4SLinus Torvalds 	return 0;
9991da177e4SLinus Torvalds }
10001da177e4SLinus Torvalds 
1001d851c12bSSteffen Klassert static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
10021da177e4SLinus Torvalds {
1003d851c12bSSteffen Klassert 	struct dst_entry *dst = &rt->dst;
10044895c771SDavid S. Miller 	struct fib_result res;
1005d52e5a7eSSabrina Dubroca 	bool lock = false;
10062c8cec5cSDavid S. Miller 
1007d52e5a7eSSabrina Dubroca 	if (ip_mtu_locked(dst))
1008fa1e492aSSteffen Klassert 		return;
1009fa1e492aSSteffen Klassert 
1010cb6ccf09SHerbert Xu 	if (ipv4_mtu(dst) < mtu)
10113cdaa5beSLi Wei 		return;
10123cdaa5beSLi Wei 
1013d52e5a7eSSabrina Dubroca 	if (mtu < ip_rt_min_pmtu) {
1014d52e5a7eSSabrina Dubroca 		lock = true;
10151da177e4SLinus Torvalds 		mtu = ip_rt_min_pmtu;
1016d52e5a7eSSabrina Dubroca 	}
101746af3180SHiroaki SHIMODA 
1018f016229eSTimo Teräs 	if (rt->rt_pmtu == mtu &&
1019f016229eSTimo Teräs 	    time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
1020f016229eSTimo Teräs 		return;
1021f016229eSTimo Teräs 
1022c5ae7d41SEric Dumazet 	rcu_read_lock();
10230eeb075fSAndy Gospodarek 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
10244895c771SDavid S. Miller 		struct fib_nh *nh = &FIB_RES_NH(res);
10254895c771SDavid S. Miller 
1026d52e5a7eSSabrina Dubroca 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
1027aee06da6SJulian Anastasov 				      jiffies + ip_rt_mtu_expires);
10284895c771SDavid S. Miller 	}
1029c5ae7d41SEric Dumazet 	rcu_read_unlock();
10301da177e4SLinus Torvalds }
10311da177e4SLinus Torvalds 
10324895c771SDavid S. Miller static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
10334895c771SDavid S. Miller 			      struct sk_buff *skb, u32 mtu)
10344895c771SDavid S. Miller {
10354895c771SDavid S. Miller 	struct rtable *rt = (struct rtable *) dst;
10364895c771SDavid S. Miller 	struct flowi4 fl4;
10374895c771SDavid S. Miller 
10384895c771SDavid S. Miller 	ip_rt_build_flow_key(&fl4, sk, skb);
1039d851c12bSSteffen Klassert 	__ip_rt_update_pmtu(rt, &fl4, mtu);
10404895c771SDavid S. Miller }
10414895c771SDavid S. Miller 
104236393395SDavid S. Miller void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
104336393395SDavid S. Miller 		      int oif, u32 mark, u8 protocol, int flow_flags)
104436393395SDavid S. Miller {
104536393395SDavid S. Miller 	const struct iphdr *iph = (const struct iphdr *) skb->data;
104636393395SDavid S. Miller 	struct flowi4 fl4;
104736393395SDavid S. Miller 	struct rtable *rt;
104836393395SDavid S. Miller 
10491b3c61dcSLorenzo Colitti 	if (!mark)
10501b3c61dcSLorenzo Colitti 		mark = IP4_REPLY_MARK(net, skb->mark);
10511b3c61dcSLorenzo Colitti 
1052e2d118a1SLorenzo Colitti 	__build_flow_key(net, &fl4, NULL, iph, oif,
10534895c771SDavid S. Miller 			 RT_TOS(iph->tos), protocol, mark, flow_flags);
105436393395SDavid S. Miller 	rt = __ip_route_output_key(net, &fl4);
105536393395SDavid S. Miller 	if (!IS_ERR(rt)) {
10564895c771SDavid S. Miller 		__ip_rt_update_pmtu(rt, &fl4, mtu);
105736393395SDavid S. Miller 		ip_rt_put(rt);
105836393395SDavid S. Miller 	}
105936393395SDavid S. Miller }
106036393395SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
106136393395SDavid S. Miller 
10629cb3a50cSSteffen Klassert static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
106336393395SDavid S. Miller {
10644895c771SDavid S. Miller 	const struct iphdr *iph = (const struct iphdr *) skb->data;
10654895c771SDavid S. Miller 	struct flowi4 fl4;
10664895c771SDavid S. Miller 	struct rtable *rt;
106736393395SDavid S. Miller 
1068e2d118a1SLorenzo Colitti 	__build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
10691b3c61dcSLorenzo Colitti 
10701b3c61dcSLorenzo Colitti 	if (!fl4.flowi4_mark)
10711b3c61dcSLorenzo Colitti 		fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
10721b3c61dcSLorenzo Colitti 
10734895c771SDavid S. Miller 	rt = __ip_route_output_key(sock_net(sk), &fl4);
10744895c771SDavid S. Miller 	if (!IS_ERR(rt)) {
10754895c771SDavid S. Miller 		__ip_rt_update_pmtu(rt, &fl4, mtu);
10764895c771SDavid S. Miller 		ip_rt_put(rt);
10774895c771SDavid S. Miller 	}
107836393395SDavid S. Miller }
10799cb3a50cSSteffen Klassert 
10809cb3a50cSSteffen Klassert void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
10819cb3a50cSSteffen Klassert {
10829cb3a50cSSteffen Klassert 	const struct iphdr *iph = (const struct iphdr *) skb->data;
10839cb3a50cSSteffen Klassert 	struct flowi4 fl4;
10849cb3a50cSSteffen Klassert 	struct rtable *rt;
10857f502361SEric Dumazet 	struct dst_entry *odst = NULL;
1086b44108dbSSteffen Klassert 	bool new = false;
1087e2d118a1SLorenzo Colitti 	struct net *net = sock_net(sk);
10889cb3a50cSSteffen Klassert 
10899cb3a50cSSteffen Klassert 	bh_lock_sock(sk);
1090482fc609SHannes Frederic Sowa 
1091482fc609SHannes Frederic Sowa 	if (!ip_sk_accept_pmtu(sk))
1092482fc609SHannes Frederic Sowa 		goto out;
1093482fc609SHannes Frederic Sowa 
10947f502361SEric Dumazet 	odst = sk_dst_get(sk);
10959cb3a50cSSteffen Klassert 
10967f502361SEric Dumazet 	if (sock_owned_by_user(sk) || !odst) {
10979cb3a50cSSteffen Klassert 		__ipv4_sk_update_pmtu(skb, sk, mtu);
10989cb3a50cSSteffen Klassert 		goto out;
10999cb3a50cSSteffen Klassert 	}
11009cb3a50cSSteffen Klassert 
1101e2d118a1SLorenzo Colitti 	__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
11029cb3a50cSSteffen Klassert 
11037f502361SEric Dumazet 	rt = (struct rtable *)odst;
110451456b29SIan Morris 	if (odst->obsolete && !odst->ops->check(odst, 0)) {
11059cb3a50cSSteffen Klassert 		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
11069cb3a50cSSteffen Klassert 		if (IS_ERR(rt))
11079cb3a50cSSteffen Klassert 			goto out;
1108b44108dbSSteffen Klassert 
1109b44108dbSSteffen Klassert 		new = true;
11109cb3a50cSSteffen Klassert 	}
11119cb3a50cSSteffen Klassert 
11120f6c480fSDavid Miller 	__ip_rt_update_pmtu((struct rtable *) xfrm_dst_path(&rt->dst), &fl4, mtu);
11139cb3a50cSSteffen Klassert 
11147f502361SEric Dumazet 	if (!dst_check(&rt->dst, 0)) {
1115b44108dbSSteffen Klassert 		if (new)
1116b44108dbSSteffen Klassert 			dst_release(&rt->dst);
1117b44108dbSSteffen Klassert 
11189cb3a50cSSteffen Klassert 		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
11199cb3a50cSSteffen Klassert 		if (IS_ERR(rt))
11209cb3a50cSSteffen Klassert 			goto out;
11219cb3a50cSSteffen Klassert 
1122b44108dbSSteffen Klassert 		new = true;
11239cb3a50cSSteffen Klassert 	}
11249cb3a50cSSteffen Klassert 
1125b44108dbSSteffen Klassert 	if (new)
11267f502361SEric Dumazet 		sk_dst_set(sk, &rt->dst);
11279cb3a50cSSteffen Klassert 
11289cb3a50cSSteffen Klassert out:
11299cb3a50cSSteffen Klassert 	bh_unlock_sock(sk);
11307f502361SEric Dumazet 	dst_release(odst);
11319cb3a50cSSteffen Klassert }
113236393395SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
1133f39925dbSDavid S. Miller 
1134b42597e2SDavid S. Miller void ipv4_redirect(struct sk_buff *skb, struct net *net,
1135b42597e2SDavid S. Miller 		   int oif, u32 mark, u8 protocol, int flow_flags)
1136b42597e2SDavid S. Miller {
1137b42597e2SDavid S. Miller 	const struct iphdr *iph = (const struct iphdr *) skb->data;
1138b42597e2SDavid S. Miller 	struct flowi4 fl4;
1139b42597e2SDavid S. Miller 	struct rtable *rt;
1140b42597e2SDavid S. Miller 
1141e2d118a1SLorenzo Colitti 	__build_flow_key(net, &fl4, NULL, iph, oif,
11424895c771SDavid S. Miller 			 RT_TOS(iph->tos), protocol, mark, flow_flags);
1143b42597e2SDavid S. Miller 	rt = __ip_route_output_key(net, &fl4);
1144b42597e2SDavid S. Miller 	if (!IS_ERR(rt)) {
1145ceb33206SDavid S. Miller 		__ip_do_redirect(rt, skb, &fl4, false);
1146b42597e2SDavid S. Miller 		ip_rt_put(rt);
1147b42597e2SDavid S. Miller 	}
1148b42597e2SDavid S. Miller }
1149b42597e2SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_redirect);
1150b42597e2SDavid S. Miller 
1151b42597e2SDavid S. Miller void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1152b42597e2SDavid S. Miller {
11534895c771SDavid S. Miller 	const struct iphdr *iph = (const struct iphdr *) skb->data;
11544895c771SDavid S. Miller 	struct flowi4 fl4;
11554895c771SDavid S. Miller 	struct rtable *rt;
1156e2d118a1SLorenzo Colitti 	struct net *net = sock_net(sk);
1157b42597e2SDavid S. Miller 
1158e2d118a1SLorenzo Colitti 	__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1159e2d118a1SLorenzo Colitti 	rt = __ip_route_output_key(net, &fl4);
11604895c771SDavid S. Miller 	if (!IS_ERR(rt)) {
1161ceb33206SDavid S. Miller 		__ip_do_redirect(rt, skb, &fl4, false);
11624895c771SDavid S. Miller 		ip_rt_put(rt);
11634895c771SDavid S. Miller 	}
1164b42597e2SDavid S. Miller }
1165b42597e2SDavid S. Miller EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1166b42597e2SDavid S. Miller 
1167efbc368dSDavid S. Miller static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1168efbc368dSDavid S. Miller {
1169efbc368dSDavid S. Miller 	struct rtable *rt = (struct rtable *) dst;
1170efbc368dSDavid S. Miller 
1171ceb33206SDavid S. Miller 	/* All IPV4 dsts are created with ->obsolete set to the value
1172ceb33206SDavid S. Miller 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1173ceb33206SDavid S. Miller 	 * into this function always.
1174ceb33206SDavid S. Miller 	 *
1175387aa65aSTimo Teräs 	 * When a PMTU/redirect information update invalidates a route,
1176387aa65aSTimo Teräs 	 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1177387aa65aSTimo Teräs 	 * DST_OBSOLETE_DEAD by dst_free().
1178ceb33206SDavid S. Miller 	 */
1179387aa65aSTimo Teräs 	if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
1180efbc368dSDavid S. Miller 		return NULL;
1181d11a4dc1STimo Teräs 	return dst;
11821da177e4SLinus Torvalds }
11831da177e4SLinus Torvalds 
11841da177e4SLinus Torvalds static void ipv4_link_failure(struct sk_buff *skb)
11851da177e4SLinus Torvalds {
11861da177e4SLinus Torvalds 	struct rtable *rt;
11871da177e4SLinus Torvalds 
11881da177e4SLinus Torvalds 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
11891da177e4SLinus Torvalds 
1190511c3f92SEric Dumazet 	rt = skb_rtable(skb);
11915943634fSDavid S. Miller 	if (rt)
11925943634fSDavid S. Miller 		dst_set_expires(&rt->dst, 0);
11932c8cec5cSDavid S. Miller }
11941da177e4SLinus Torvalds 
1195ede2059dSEric W. Biederman static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
11961da177e4SLinus Torvalds {
119791df42beSJoe Perches 	pr_debug("%s: %pI4 -> %pI4, %s\n",
119891df42beSJoe Perches 		 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
11991da177e4SLinus Torvalds 		 skb->dev ? skb->dev->name : "?");
12001da177e4SLinus Torvalds 	kfree_skb(skb);
1201c378a9c0SDave Jones 	WARN_ON(1);
12021da177e4SLinus Torvalds 	return 0;
12031da177e4SLinus Torvalds }
12041da177e4SLinus Torvalds 
12051da177e4SLinus Torvalds /*
12061da177e4SLinus Torvalds    We do not cache source address of outgoing interface,
12071da177e4SLinus Torvalds    because it is used only by IP RR, TS and SRR options,
12081da177e4SLinus Torvalds    so that it out of fast path.
12091da177e4SLinus Torvalds 
12101da177e4SLinus Torvalds    BTW remember: "addr" is allowed to be not aligned
12111da177e4SLinus Torvalds    in IP options!
12121da177e4SLinus Torvalds  */
12131da177e4SLinus Torvalds 
12148e36360aSDavid S. Miller void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
12151da177e4SLinus Torvalds {
1216a61ced5dSAl Viro 	__be32 src;
12171da177e4SLinus Torvalds 
1218c7537967SDavid S. Miller 	if (rt_is_output_route(rt))
1219c5be24ffSDavid S. Miller 		src = ip_hdr(skb)->saddr;
1220ebc0ffaeSEric Dumazet 	else {
12218e36360aSDavid S. Miller 		struct fib_result res;
12228e36360aSDavid S. Miller 		struct flowi4 fl4;
12238e36360aSDavid S. Miller 		struct iphdr *iph;
12248e36360aSDavid S. Miller 
12258e36360aSDavid S. Miller 		iph = ip_hdr(skb);
12268e36360aSDavid S. Miller 
12278e36360aSDavid S. Miller 		memset(&fl4, 0, sizeof(fl4));
12288e36360aSDavid S. Miller 		fl4.daddr = iph->daddr;
12298e36360aSDavid S. Miller 		fl4.saddr = iph->saddr;
1230b0fe4a31SJulian Anastasov 		fl4.flowi4_tos = RT_TOS(iph->tos);
12318e36360aSDavid S. Miller 		fl4.flowi4_oif = rt->dst.dev->ifindex;
12328e36360aSDavid S. Miller 		fl4.flowi4_iif = skb->dev->ifindex;
12338e36360aSDavid S. Miller 		fl4.flowi4_mark = skb->mark;
12345e2b61f7SDavid S. Miller 
1235ebc0ffaeSEric Dumazet 		rcu_read_lock();
12360eeb075fSAndy Gospodarek 		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
1237436c3b66SDavid S. Miller 			src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
1238ebc0ffaeSEric Dumazet 		else
1239f8126f1dSDavid S. Miller 			src = inet_select_addr(rt->dst.dev,
1240f8126f1dSDavid S. Miller 					       rt_nexthop(rt, iph->daddr),
12411da177e4SLinus Torvalds 					       RT_SCOPE_UNIVERSE);
1242ebc0ffaeSEric Dumazet 		rcu_read_unlock();
1243ebc0ffaeSEric Dumazet 	}
12441da177e4SLinus Torvalds 	memcpy(addr, &src, 4);
12451da177e4SLinus Torvalds }
12461da177e4SLinus Torvalds 
1247c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
12481da177e4SLinus Torvalds static void set_class_tag(struct rtable *rt, u32 tag)
12491da177e4SLinus Torvalds {
1250d8d1f30bSChangli Gao 	if (!(rt->dst.tclassid & 0xFFFF))
1251d8d1f30bSChangli Gao 		rt->dst.tclassid |= tag & 0xFFFF;
1252d8d1f30bSChangli Gao 	if (!(rt->dst.tclassid & 0xFFFF0000))
1253d8d1f30bSChangli Gao 		rt->dst.tclassid |= tag & 0xFFFF0000;
12541da177e4SLinus Torvalds }
12551da177e4SLinus Torvalds #endif
12561da177e4SLinus Torvalds 
12570dbaee3bSDavid S. Miller static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
12580dbaee3bSDavid S. Miller {
12597ed14d97SGao Feng 	unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
1260164a5e7aSEric Dumazet 	unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
12610dbaee3bSDavid S. Miller 				    ip_rt_min_advmss);
12627ed14d97SGao Feng 
12637ed14d97SGao Feng 	return min(advmss, IPV4_MAX_PMTU - header_size);
12640dbaee3bSDavid S. Miller }
12650dbaee3bSDavid S. Miller 
1266ebb762f2SSteffen Klassert static unsigned int ipv4_mtu(const struct dst_entry *dst)
1267d33e4553SDavid S. Miller {
1268261663b0SSteffen Klassert 	const struct rtable *rt = (const struct rtable *) dst;
12695943634fSDavid S. Miller 	unsigned int mtu = rt->rt_pmtu;
12705943634fSDavid S. Miller 
127198d75c37SAlexander Duyck 	if (!mtu || time_after_eq(jiffies, rt->dst.expires))
12725943634fSDavid S. Miller 		mtu = dst_metric_raw(dst, RTAX_MTU);
1273618f9bc7SSteffen Klassert 
127438d523e2SSteffen Klassert 	if (mtu)
1275618f9bc7SSteffen Klassert 		return mtu;
1276618f9bc7SSteffen Klassert 
1277c780a049SEric Dumazet 	mtu = READ_ONCE(dst->dev->mtu);
1278d33e4553SDavid S. Miller 
1279d52e5a7eSSabrina Dubroca 	if (unlikely(ip_mtu_locked(dst))) {
1280155e8336SJulian Anastasov 		if (rt->rt_uses_gateway && mtu > 576)
1281d33e4553SDavid S. Miller 			mtu = 576;
1282d33e4553SDavid S. Miller 	}
1283d33e4553SDavid S. Miller 
128414972cbdSRoopa Prabhu 	mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
128514972cbdSRoopa Prabhu 
128614972cbdSRoopa Prabhu 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1287d33e4553SDavid S. Miller }
1288d33e4553SDavid S. Miller 
128994720e3aSJulian Anastasov static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
129094720e3aSJulian Anastasov {
129194720e3aSJulian Anastasov 	struct fnhe_hash_bucket *hash;
129294720e3aSJulian Anastasov 	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
129394720e3aSJulian Anastasov 	u32 hval = fnhe_hashfun(daddr);
129494720e3aSJulian Anastasov 
129594720e3aSJulian Anastasov 	spin_lock_bh(&fnhe_lock);
129694720e3aSJulian Anastasov 
129794720e3aSJulian Anastasov 	hash = rcu_dereference_protected(nh->nh_exceptions,
129894720e3aSJulian Anastasov 					 lockdep_is_held(&fnhe_lock));
129994720e3aSJulian Anastasov 	hash += hval;
130094720e3aSJulian Anastasov 
130194720e3aSJulian Anastasov 	fnhe_p = &hash->chain;
130294720e3aSJulian Anastasov 	fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
130394720e3aSJulian Anastasov 	while (fnhe) {
130494720e3aSJulian Anastasov 		if (fnhe->fnhe_daddr == daddr) {
130594720e3aSJulian Anastasov 			rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
130694720e3aSJulian Anastasov 				fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
130794720e3aSJulian Anastasov 			fnhe_flush_routes(fnhe);
130894720e3aSJulian Anastasov 			kfree_rcu(fnhe, rcu);
130994720e3aSJulian Anastasov 			break;
131094720e3aSJulian Anastasov 		}
131194720e3aSJulian Anastasov 		fnhe_p = &fnhe->fnhe_next;
131294720e3aSJulian Anastasov 		fnhe = rcu_dereference_protected(fnhe->fnhe_next,
131394720e3aSJulian Anastasov 						 lockdep_is_held(&fnhe_lock));
131494720e3aSJulian Anastasov 	}
131594720e3aSJulian Anastasov 
131694720e3aSJulian Anastasov 	spin_unlock_bh(&fnhe_lock);
131794720e3aSJulian Anastasov }
131894720e3aSJulian Anastasov 
1319f2bb4bedSDavid S. Miller static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
13204895c771SDavid S. Miller {
1321caa41527SEric Dumazet 	struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
13224895c771SDavid S. Miller 	struct fib_nh_exception *fnhe;
13234895c771SDavid S. Miller 	u32 hval;
13244895c771SDavid S. Miller 
1325f2bb4bedSDavid S. Miller 	if (!hash)
1326f2bb4bedSDavid S. Miller 		return NULL;
1327f2bb4bedSDavid S. Miller 
1328d3a25c98SDavid S. Miller 	hval = fnhe_hashfun(daddr);
13294895c771SDavid S. Miller 
13304895c771SDavid S. Miller 	for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
13314895c771SDavid S. Miller 	     fnhe = rcu_dereference(fnhe->fnhe_next)) {
133294720e3aSJulian Anastasov 		if (fnhe->fnhe_daddr == daddr) {
133394720e3aSJulian Anastasov 			if (fnhe->fnhe_expires &&
133494720e3aSJulian Anastasov 			    time_after(jiffies, fnhe->fnhe_expires)) {
133594720e3aSJulian Anastasov 				ip_del_fnhe(nh, daddr);
133694720e3aSJulian Anastasov 				break;
133794720e3aSJulian Anastasov 			}
1338f2bb4bedSDavid S. Miller 			return fnhe;
1339f2bb4bedSDavid S. Miller 		}
134094720e3aSJulian Anastasov 	}
1341f2bb4bedSDavid S. Miller 	return NULL;
1342f2bb4bedSDavid S. Miller }
1343f2bb4bedSDavid S. Miller 
134450d889b1SDavid Ahern /* MTU selection:
134550d889b1SDavid Ahern  * 1. mtu on route is locked - use it
134650d889b1SDavid Ahern  * 2. mtu from nexthop exception
134750d889b1SDavid Ahern  * 3. mtu from egress device
134850d889b1SDavid Ahern  */
134950d889b1SDavid Ahern 
135050d889b1SDavid Ahern u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
135150d889b1SDavid Ahern {
135250d889b1SDavid Ahern 	struct fib_info *fi = res->fi;
135350d889b1SDavid Ahern 	struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
135450d889b1SDavid Ahern 	struct net_device *dev = nh->nh_dev;
135550d889b1SDavid Ahern 	u32 mtu = 0;
135650d889b1SDavid Ahern 
135750d889b1SDavid Ahern 	if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
135850d889b1SDavid Ahern 	    fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
135950d889b1SDavid Ahern 		mtu = fi->fib_mtu;
136050d889b1SDavid Ahern 
136150d889b1SDavid Ahern 	if (likely(!mtu)) {
136250d889b1SDavid Ahern 		struct fib_nh_exception *fnhe;
136350d889b1SDavid Ahern 
136450d889b1SDavid Ahern 		fnhe = find_exception(nh, daddr);
136550d889b1SDavid Ahern 		if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
136650d889b1SDavid Ahern 			mtu = fnhe->fnhe_pmtu;
136750d889b1SDavid Ahern 	}
136850d889b1SDavid Ahern 
136950d889b1SDavid Ahern 	if (likely(!mtu))
137050d889b1SDavid Ahern 		mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
137150d889b1SDavid Ahern 
137250d889b1SDavid Ahern 	return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
137350d889b1SDavid Ahern }
137450d889b1SDavid Ahern 
1375caacf05eSDavid S. Miller static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1376a4c2fd7fSWei Wang 			      __be32 daddr, const bool do_cache)
1377f2bb4bedSDavid S. Miller {
1378caacf05eSDavid S. Miller 	bool ret = false;
1379caacf05eSDavid S. Miller 
1380c5038a83SDavid S. Miller 	spin_lock_bh(&fnhe_lock);
1381aee06da6SJulian Anastasov 
1382c5038a83SDavid S. Miller 	if (daddr == fnhe->fnhe_daddr) {
13832ffae99dSTimo Teräs 		struct rtable __rcu **porig;
13842ffae99dSTimo Teräs 		struct rtable *orig;
13855aad1de5STimo Teräs 		int genid = fnhe_genid(dev_net(rt->dst.dev));
13862ffae99dSTimo Teräs 
13872ffae99dSTimo Teräs 		if (rt_is_input_route(rt))
13882ffae99dSTimo Teräs 			porig = &fnhe->fnhe_rth_input;
13892ffae99dSTimo Teräs 		else
13902ffae99dSTimo Teräs 			porig = &fnhe->fnhe_rth_output;
13912ffae99dSTimo Teräs 		orig = rcu_dereference(*porig);
13925aad1de5STimo Teräs 
13935aad1de5STimo Teräs 		if (fnhe->fnhe_genid != genid) {
13945aad1de5STimo Teräs 			fnhe->fnhe_genid = genid;
139513d82bf5SSteffen Klassert 			fnhe->fnhe_gw = 0;
139613d82bf5SSteffen Klassert 			fnhe->fnhe_pmtu = 0;
139713d82bf5SSteffen Klassert 			fnhe->fnhe_expires = 0;
13980e8411e4SHangbin Liu 			fnhe->fnhe_mtu_locked = false;
13992ffae99dSTimo Teräs 			fnhe_flush_routes(fnhe);
14002ffae99dSTimo Teräs 			orig = NULL;
140113d82bf5SSteffen Klassert 		}
1402387aa65aSTimo Teräs 		fill_route_from_fnhe(rt, fnhe);
1403387aa65aSTimo Teräs 		if (!rt->rt_gateway)
1404155e8336SJulian Anastasov 			rt->rt_gateway = daddr;
1405f2bb4bedSDavid S. Miller 
1406a4c2fd7fSWei Wang 		if (do_cache) {
14070830106cSWei Wang 			dst_hold(&rt->dst);
14082ffae99dSTimo Teräs 			rcu_assign_pointer(*porig, rt);
14090830106cSWei Wang 			if (orig) {
141095c47f9cSWei Wang 				dst_dev_put(&orig->dst);
14110830106cSWei Wang 				dst_release(&orig->dst);
14120830106cSWei Wang 			}
14132ffae99dSTimo Teräs 			ret = true;
14142ffae99dSTimo Teräs 		}
1415c5038a83SDavid S. Miller 
1416c5038a83SDavid S. Miller 		fnhe->fnhe_stamp = jiffies;
1417c5038a83SDavid S. Miller 	}
1418c5038a83SDavid S. Miller 	spin_unlock_bh(&fnhe_lock);
1419caacf05eSDavid S. Miller 
1420caacf05eSDavid S. Miller 	return ret;
142154764bb6SEric Dumazet }
142254764bb6SEric Dumazet 
1423caacf05eSDavid S. Miller static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1424f2bb4bedSDavid S. Miller {
1425d26b3a7cSEric Dumazet 	struct rtable *orig, *prev, **p;
1426caacf05eSDavid S. Miller 	bool ret = true;
1427f2bb4bedSDavid S. Miller 
1428d26b3a7cSEric Dumazet 	if (rt_is_input_route(rt)) {
142954764bb6SEric Dumazet 		p = (struct rtable **)&nh->nh_rth_input;
1430d26b3a7cSEric Dumazet 	} else {
1431903ceff7SChristoph Lameter 		p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
1432d26b3a7cSEric Dumazet 	}
1433f2bb4bedSDavid S. Miller 	orig = *p;
1434f2bb4bedSDavid S. Miller 
14350830106cSWei Wang 	/* hold dst before doing cmpxchg() to avoid race condition
14360830106cSWei Wang 	 * on this dst
14370830106cSWei Wang 	 */
14380830106cSWei Wang 	dst_hold(&rt->dst);
1439f2bb4bedSDavid S. Miller 	prev = cmpxchg(p, orig, rt);
1440f2bb4bedSDavid S. Miller 	if (prev == orig) {
14410830106cSWei Wang 		if (orig) {
144295c47f9cSWei Wang 			dst_dev_put(&orig->dst);
14430830106cSWei Wang 			dst_release(&orig->dst);
14440830106cSWei Wang 		}
14450830106cSWei Wang 	} else {
14460830106cSWei Wang 		dst_release(&rt->dst);
1447caacf05eSDavid S. Miller 		ret = false;
14480830106cSWei Wang 	}
1449caacf05eSDavid S. Miller 
1450caacf05eSDavid S. Miller 	return ret;
1451caacf05eSDavid S. Miller }
1452caacf05eSDavid S. Miller 
14535055c371SEric Dumazet struct uncached_list {
14545055c371SEric Dumazet 	spinlock_t		lock;
14555055c371SEric Dumazet 	struct list_head	head;
14565055c371SEric Dumazet };
14575055c371SEric Dumazet 
14585055c371SEric Dumazet static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
1459caacf05eSDavid S. Miller 
1460510c321bSXin Long void rt_add_uncached_list(struct rtable *rt)
1461caacf05eSDavid S. Miller {
14625055c371SEric Dumazet 	struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
14635055c371SEric Dumazet 
14645055c371SEric Dumazet 	rt->rt_uncached_list = ul;
14655055c371SEric Dumazet 
14665055c371SEric Dumazet 	spin_lock_bh(&ul->lock);
14675055c371SEric Dumazet 	list_add_tail(&rt->rt_uncached, &ul->head);
14685055c371SEric Dumazet 	spin_unlock_bh(&ul->lock);
1469caacf05eSDavid S. Miller }
1470caacf05eSDavid S. Miller 
1471510c321bSXin Long void rt_del_uncached_list(struct rtable *rt)
1472510c321bSXin Long {
1473510c321bSXin Long 	if (!list_empty(&rt->rt_uncached)) {
1474510c321bSXin Long 		struct uncached_list *ul = rt->rt_uncached_list;
1475510c321bSXin Long 
1476510c321bSXin Long 		spin_lock_bh(&ul->lock);
1477510c321bSXin Long 		list_del(&rt->rt_uncached);
1478510c321bSXin Long 		spin_unlock_bh(&ul->lock);
1479510c321bSXin Long 	}
1480510c321bSXin Long }
1481510c321bSXin Long 
1482caacf05eSDavid S. Miller static void ipv4_dst_destroy(struct dst_entry *dst)
1483caacf05eSDavid S. Miller {
14843fb07dafSEric Dumazet 	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
1485caacf05eSDavid S. Miller 	struct rtable *rt = (struct rtable *)dst;
1486caacf05eSDavid S. Miller 
14879620fef2SEric Dumazet 	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
14883fb07dafSEric Dumazet 		kfree(p);
14893fb07dafSEric Dumazet 
1490510c321bSXin Long 	rt_del_uncached_list(rt);
1491caacf05eSDavid S. Miller }
1492caacf05eSDavid S. Miller 
1493caacf05eSDavid S. Miller void rt_flush_dev(struct net_device *dev)
1494caacf05eSDavid S. Miller {
1495caacf05eSDavid S. Miller 	struct net *net = dev_net(dev);
1496caacf05eSDavid S. Miller 	struct rtable *rt;
14975055c371SEric Dumazet 	int cpu;
1498caacf05eSDavid S. Miller 
14995055c371SEric Dumazet 	for_each_possible_cpu(cpu) {
15005055c371SEric Dumazet 		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
15015055c371SEric Dumazet 
15025055c371SEric Dumazet 		spin_lock_bh(&ul->lock);
15035055c371SEric Dumazet 		list_for_each_entry(rt, &ul->head, rt_uncached) {
1504caacf05eSDavid S. Miller 			if (rt->dst.dev != dev)
1505caacf05eSDavid S. Miller 				continue;
1506caacf05eSDavid S. Miller 			rt->dst.dev = net->loopback_dev;
1507caacf05eSDavid S. Miller 			dev_hold(rt->dst.dev);
1508caacf05eSDavid S. Miller 			dev_put(dev);
1509caacf05eSDavid S. Miller 		}
15105055c371SEric Dumazet 		spin_unlock_bh(&ul->lock);
15114895c771SDavid S. Miller 	}
15124895c771SDavid S. Miller }
15134895c771SDavid S. Miller 
15144331debcSEric Dumazet static bool rt_cache_valid(const struct rtable *rt)
1515d2d68ba9SDavid S. Miller {
15164331debcSEric Dumazet 	return	rt &&
15174331debcSEric Dumazet 		rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
15184331debcSEric Dumazet 		!rt_is_expired(rt);
1519d2d68ba9SDavid S. Miller }
1520d2d68ba9SDavid S. Miller 
1521f2bb4bedSDavid S. Miller static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
15225e2b61f7SDavid S. Miller 			   const struct fib_result *res,
1523f2bb4bedSDavid S. Miller 			   struct fib_nh_exception *fnhe,
1524a4c2fd7fSWei Wang 			   struct fib_info *fi, u16 type, u32 itag,
1525a4c2fd7fSWei Wang 			   const bool do_cache)
15261da177e4SLinus Torvalds {
1527caacf05eSDavid S. Miller 	bool cached = false;
1528caacf05eSDavid S. Miller 
15291da177e4SLinus Torvalds 	if (fi) {
15304895c771SDavid S. Miller 		struct fib_nh *nh = &FIB_RES_NH(*res);
15314895c771SDavid S. Miller 
1532155e8336SJulian Anastasov 		if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
15334895c771SDavid S. Miller 			rt->rt_gateway = nh->nh_gw;
1534155e8336SJulian Anastasov 			rt->rt_uses_gateway = 1;
1535155e8336SJulian Anastasov 		}
15363fb07dafSEric Dumazet 		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
15373fb07dafSEric Dumazet 		if (fi->fib_metrics != &dst_default_metrics) {
15383fb07dafSEric Dumazet 			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
15399620fef2SEric Dumazet 			refcount_inc(&fi->fib_metrics->refcnt);
15403fb07dafSEric Dumazet 		}
1541c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
1542f2bb4bedSDavid S. Miller 		rt->dst.tclassid = nh->nh_tclassid;
15431da177e4SLinus Torvalds #endif
154461adedf3SJiri Benc 		rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1545c5038a83SDavid S. Miller 		if (unlikely(fnhe))
1546a4c2fd7fSWei Wang 			cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
1547a4c2fd7fSWei Wang 		else if (do_cache)
1548caacf05eSDavid S. Miller 			cached = rt_cache_route(nh, rt);
1549155e8336SJulian Anastasov 		if (unlikely(!cached)) {
1550155e8336SJulian Anastasov 			/* Routes we intend to cache in nexthop exception or
1551155e8336SJulian Anastasov 			 * FIB nexthop have the DST_NOCACHE bit clear.
1552155e8336SJulian Anastasov 			 * However, if we are unsuccessful at storing this
1553155e8336SJulian Anastasov 			 * route into the cache we really need to set it.
1554155e8336SJulian Anastasov 			 */
1555155e8336SJulian Anastasov 			if (!rt->rt_gateway)
1556155e8336SJulian Anastasov 				rt->rt_gateway = daddr;
1557155e8336SJulian Anastasov 			rt_add_uncached_list(rt);
1558d33e4553SDavid S. Miller 		}
1559155e8336SJulian Anastasov 	} else
1560caacf05eSDavid S. Miller 		rt_add_uncached_list(rt);
15611da177e4SLinus Torvalds 
1562c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
15631da177e4SLinus Torvalds #ifdef CONFIG_IP_MULTIPLE_TABLES
156485b91b03SDavid S. Miller 	set_class_tag(rt, res->tclassid);
15651da177e4SLinus Torvalds #endif
15661da177e4SLinus Torvalds 	set_class_tag(rt, itag);
15671da177e4SLinus Torvalds #endif
15681da177e4SLinus Torvalds }
15691da177e4SLinus Torvalds 
15709ab179d8SDavid Ahern struct rtable *rt_dst_alloc(struct net_device *dev,
1571d08c4f35SDavid Ahern 			    unsigned int flags, u16 type,
1572f2bb4bedSDavid S. Miller 			    bool nopolicy, bool noxfrm, bool will_cache)
15730c4dcd58SDavid S. Miller {
1574d08c4f35SDavid Ahern 	struct rtable *rt;
1575d08c4f35SDavid Ahern 
1576d08c4f35SDavid Ahern 	rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1577a4c2fd7fSWei Wang 		       (will_cache ? 0 : DST_HOST) |
15780c4dcd58SDavid S. Miller 		       (nopolicy ? DST_NOPOLICY : 0) |
1579b2a9c0edSWei Wang 		       (noxfrm ? DST_NOXFRM : 0));
1580d08c4f35SDavid Ahern 
1581d08c4f35SDavid Ahern 	if (rt) {
1582d08c4f35SDavid Ahern 		rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1583d08c4f35SDavid Ahern 		rt->rt_flags = flags;
1584d08c4f35SDavid Ahern 		rt->rt_type = type;
1585d08c4f35SDavid Ahern 		rt->rt_is_input = 0;
1586d08c4f35SDavid Ahern 		rt->rt_iif = 0;
1587d08c4f35SDavid Ahern 		rt->rt_pmtu = 0;
1588d52e5a7eSSabrina Dubroca 		rt->rt_mtu_locked = 0;
1589d08c4f35SDavid Ahern 		rt->rt_gateway = 0;
1590d08c4f35SDavid Ahern 		rt->rt_uses_gateway = 0;
1591d08c4f35SDavid Ahern 		INIT_LIST_HEAD(&rt->rt_uncached);
1592d08c4f35SDavid Ahern 
1593d08c4f35SDavid Ahern 		rt->dst.output = ip_output;
1594d08c4f35SDavid Ahern 		if (flags & RTCF_LOCAL)
1595d08c4f35SDavid Ahern 			rt->dst.input = ip_local_deliver;
1596d08c4f35SDavid Ahern 	}
1597d08c4f35SDavid Ahern 
1598d08c4f35SDavid Ahern 	return rt;
15990c4dcd58SDavid S. Miller }
16009ab179d8SDavid Ahern EXPORT_SYMBOL(rt_dst_alloc);
16010c4dcd58SDavid S. Miller 
160296d36220SEric Dumazet /* called in rcu_read_lock() section */
1603bc044e8dSPaolo Abeni int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1604bc044e8dSPaolo Abeni 			  u8 tos, struct net_device *dev,
1605bc044e8dSPaolo Abeni 			  struct in_device *in_dev, u32 *itag)
16061da177e4SLinus Torvalds {
1607b5f7e755SEric Dumazet 	int err;
16081da177e4SLinus Torvalds 
16091da177e4SLinus Torvalds 	/* Primary sanity checks. */
161051456b29SIan Morris 	if (!in_dev)
16111da177e4SLinus Torvalds 		return -EINVAL;
16121da177e4SLinus Torvalds 
16131e637c74SJan Engelhardt 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1614d0daebc3SThomas Graf 	    skb->protocol != htons(ETH_P_IP))
1615bc044e8dSPaolo Abeni 		return -EINVAL;
1616d0daebc3SThomas Graf 
161775fea73dSAlexander Duyck 	if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1618bc044e8dSPaolo Abeni 		return -EINVAL;
16191da177e4SLinus Torvalds 
1620f97c1e0cSJoe Perches 	if (ipv4_is_zeronet(saddr)) {
1621f97c1e0cSJoe Perches 		if (!ipv4_is_local_multicast(daddr))
1622bc044e8dSPaolo Abeni 			return -EINVAL;
1623b5f7e755SEric Dumazet 	} else {
16249e56e380SDavid S. Miller 		err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1625bc044e8dSPaolo Abeni 					  in_dev, itag);
1626b5f7e755SEric Dumazet 		if (err < 0)
1627bc044e8dSPaolo Abeni 			return err;
1628b5f7e755SEric Dumazet 	}
1629bc044e8dSPaolo Abeni 	return 0;
1630bc044e8dSPaolo Abeni }
1631bc044e8dSPaolo Abeni 
1632bc044e8dSPaolo Abeni /* called in rcu_read_lock() section */
1633bc044e8dSPaolo Abeni static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1634bc044e8dSPaolo Abeni 			     u8 tos, struct net_device *dev, int our)
1635bc044e8dSPaolo Abeni {
1636bc044e8dSPaolo Abeni 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1637bc044e8dSPaolo Abeni 	unsigned int flags = RTCF_MULTICAST;
1638bc044e8dSPaolo Abeni 	struct rtable *rth;
1639bc044e8dSPaolo Abeni 	u32 itag = 0;
1640bc044e8dSPaolo Abeni 	int err;
1641bc044e8dSPaolo Abeni 
1642bc044e8dSPaolo Abeni 	err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
1643bc044e8dSPaolo Abeni 	if (err)
1644bc044e8dSPaolo Abeni 		return err;
1645bc044e8dSPaolo Abeni 
1646d08c4f35SDavid Ahern 	if (our)
1647d08c4f35SDavid Ahern 		flags |= RTCF_LOCAL;
1648d08c4f35SDavid Ahern 
1649d08c4f35SDavid Ahern 	rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
1650f2bb4bedSDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
16511da177e4SLinus Torvalds 	if (!rth)
1652bc044e8dSPaolo Abeni 		return -ENOBUFS;
16531da177e4SLinus Torvalds 
1654c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
1655d8d1f30bSChangli Gao 	rth->dst.tclassid = itag;
16561da177e4SLinus Torvalds #endif
1657cf911662SDavid S. Miller 	rth->dst.output = ip_rt_bug;
16589917e1e8SDavid S. Miller 	rth->rt_is_input= 1;
16591da177e4SLinus Torvalds 
16601da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE
1661f97c1e0cSJoe Perches 	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1662d8d1f30bSChangli Gao 		rth->dst.input = ip_mr_input;
16631da177e4SLinus Torvalds #endif
16641da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_slow_mc);
16651da177e4SLinus Torvalds 
166689aef892SDavid S. Miller 	skb_dst_set(skb, &rth->dst);
166789aef892SDavid S. Miller 	return 0;
16681da177e4SLinus Torvalds }
16691da177e4SLinus Torvalds 
16701da177e4SLinus Torvalds 
16711da177e4SLinus Torvalds static void ip_handle_martian_source(struct net_device *dev,
16721da177e4SLinus Torvalds 				     struct in_device *in_dev,
16731da177e4SLinus Torvalds 				     struct sk_buff *skb,
16749e12bb22SAl Viro 				     __be32 daddr,
16759e12bb22SAl Viro 				     __be32 saddr)
16761da177e4SLinus Torvalds {
16771da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_martian_src);
16781da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE
16791da177e4SLinus Torvalds 	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
16801da177e4SLinus Torvalds 		/*
16811da177e4SLinus Torvalds 		 *	RFC1812 recommendation, if source is martian,
16821da177e4SLinus Torvalds 		 *	the only hint is MAC header.
16831da177e4SLinus Torvalds 		 */
1684058bd4d2SJoe Perches 		pr_warn("martian source %pI4 from %pI4, on dev %s\n",
1685673d57e7SHarvey Harrison 			&daddr, &saddr, dev->name);
168698e399f8SArnaldo Carvalho de Melo 		if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1687058bd4d2SJoe Perches 			print_hex_dump(KERN_WARNING, "ll header: ",
1688058bd4d2SJoe Perches 				       DUMP_PREFIX_OFFSET, 16, 1,
1689058bd4d2SJoe Perches 				       skb_mac_header(skb),
1690058bd4d2SJoe Perches 				       dev->hard_header_len, true);
16911da177e4SLinus Torvalds 		}
16921da177e4SLinus Torvalds 	}
16931da177e4SLinus Torvalds #endif
16941da177e4SLinus Torvalds }
16951da177e4SLinus Torvalds 
169647360228SEric Dumazet /* called in rcu_read_lock() section */
16975969f71dSStephen Hemminger static int __mkroute_input(struct sk_buff *skb,
1698982721f3SDavid S. Miller 			   const struct fib_result *res,
16991da177e4SLinus Torvalds 			   struct in_device *in_dev,
1700c6cffba4SDavid S. Miller 			   __be32 daddr, __be32 saddr, u32 tos)
17011da177e4SLinus Torvalds {
17022ffae99dSTimo Teräs 	struct fib_nh_exception *fnhe;
17031da177e4SLinus Torvalds 	struct rtable *rth;
17041da177e4SLinus Torvalds 	int err;
17051da177e4SLinus Torvalds 	struct in_device *out_dev;
1706d2d68ba9SDavid S. Miller 	bool do_cache;
1707fbdc0ad0SLi RongQing 	u32 itag = 0;
17081da177e4SLinus Torvalds 
17091da177e4SLinus Torvalds 	/* get a working reference to the output device */
171047360228SEric Dumazet 	out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
171151456b29SIan Morris 	if (!out_dev) {
1712e87cc472SJoe Perches 		net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
17131da177e4SLinus Torvalds 		return -EINVAL;
17141da177e4SLinus Torvalds 	}
17151da177e4SLinus Torvalds 
17165c04c819SMichael Smith 	err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
17179e56e380SDavid S. Miller 				  in_dev->dev, in_dev, &itag);
17181da177e4SLinus Torvalds 	if (err < 0) {
17191da177e4SLinus Torvalds 		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
17201da177e4SLinus Torvalds 					 saddr);
17211da177e4SLinus Torvalds 
17221da177e4SLinus Torvalds 		goto cleanup;
17231da177e4SLinus Torvalds 	}
17241da177e4SLinus Torvalds 
1725e81da0e1SJulian Anastasov 	do_cache = res->fi && !itag;
1726e81da0e1SJulian Anastasov 	if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
1727df4d9254SHannes Frederic Sowa 	    skb->protocol == htons(ETH_P_IP) &&
17281da177e4SLinus Torvalds 	    (IN_DEV_SHARED_MEDIA(out_dev) ||
1729df4d9254SHannes Frederic Sowa 	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1730df4d9254SHannes Frederic Sowa 		IPCB(skb)->flags |= IPSKB_DOREDIRECT;
17311da177e4SLinus Torvalds 
17321da177e4SLinus Torvalds 	if (skb->protocol != htons(ETH_P_IP)) {
17331da177e4SLinus Torvalds 		/* Not IP (i.e. ARP). Do not create route, if it is
17341da177e4SLinus Torvalds 		 * invalid for proxy arp. DNAT routes are always valid.
173565324144SJesper Dangaard Brouer 		 *
173665324144SJesper Dangaard Brouer 		 * Proxy arp feature have been extended to allow, ARP
173765324144SJesper Dangaard Brouer 		 * replies back to the same interface, to support
173865324144SJesper Dangaard Brouer 		 * Private VLAN switch technologies. See arp.c.
17391da177e4SLinus Torvalds 		 */
174065324144SJesper Dangaard Brouer 		if (out_dev == in_dev &&
174165324144SJesper Dangaard Brouer 		    IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
17421da177e4SLinus Torvalds 			err = -EINVAL;
17431da177e4SLinus Torvalds 			goto cleanup;
17441da177e4SLinus Torvalds 		}
17451da177e4SLinus Torvalds 	}
17461da177e4SLinus Torvalds 
17472ffae99dSTimo Teräs 	fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1748e81da0e1SJulian Anastasov 	if (do_cache) {
174994720e3aSJulian Anastasov 		if (fnhe)
17502ffae99dSTimo Teräs 			rth = rcu_dereference(fnhe->fnhe_rth_input);
175194720e3aSJulian Anastasov 		else
175254764bb6SEric Dumazet 			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1753d2d68ba9SDavid S. Miller 		if (rt_cache_valid(rth)) {
1754c6cffba4SDavid S. Miller 			skb_dst_set_noref(skb, &rth->dst);
1755d2d68ba9SDavid S. Miller 			goto out;
1756d2d68ba9SDavid S. Miller 		}
1757d2d68ba9SDavid S. Miller 	}
1758f2bb4bedSDavid S. Miller 
1759d08c4f35SDavid Ahern 	rth = rt_dst_alloc(out_dev->dev, 0, res->type,
17605c1e6aa3SDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
1761d2d68ba9SDavid S. Miller 			   IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
17621da177e4SLinus Torvalds 	if (!rth) {
17631da177e4SLinus Torvalds 		err = -ENOBUFS;
17641da177e4SLinus Torvalds 		goto cleanup;
17651da177e4SLinus Torvalds 	}
17661da177e4SLinus Torvalds 
17679917e1e8SDavid S. Miller 	rth->rt_is_input = 1;
1768a6254864SDuan Jiong 	RT_CACHE_STAT_INC(in_slow_tot);
17691da177e4SLinus Torvalds 
1770d8d1f30bSChangli Gao 	rth->dst.input = ip_forward;
17711da177e4SLinus Torvalds 
1772a4c2fd7fSWei Wang 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
1773a4c2fd7fSWei Wang 		       do_cache);
17749942895bSDavid Ahern 	lwtunnel_set_redirect(&rth->dst);
1775c6cffba4SDavid S. Miller 	skb_dst_set(skb, &rth->dst);
1776d2d68ba9SDavid S. Miller out:
17771da177e4SLinus Torvalds 	err = 0;
17781da177e4SLinus Torvalds  cleanup:
17791da177e4SLinus Torvalds 	return err;
17801da177e4SLinus Torvalds }
17811da177e4SLinus Torvalds 
178279a13159SPeter Nørlund #ifdef CONFIG_IP_ROUTE_MULTIPATH
178379a13159SPeter Nørlund /* To make ICMP packets follow the right flow, the multipath hash is
1784bf4e0a3dSNikolay Aleksandrov  * calculated from the inner IP addresses.
178579a13159SPeter Nørlund  */
1786bf4e0a3dSNikolay Aleksandrov static void ip_multipath_l3_keys(const struct sk_buff *skb,
1787bf4e0a3dSNikolay Aleksandrov 				 struct flow_keys *hash_keys)
178879a13159SPeter Nørlund {
178979a13159SPeter Nørlund 	const struct iphdr *outer_iph = ip_hdr(skb);
17906f74b6c2SDavid Ahern 	const struct iphdr *key_iph = outer_iph;
1791bf4e0a3dSNikolay Aleksandrov 	const struct iphdr *inner_iph;
179279a13159SPeter Nørlund 	const struct icmphdr *icmph;
179379a13159SPeter Nørlund 	struct iphdr _inner_iph;
1794bf4e0a3dSNikolay Aleksandrov 	struct icmphdr _icmph;
1795bf4e0a3dSNikolay Aleksandrov 
1796bf4e0a3dSNikolay Aleksandrov 	if (likely(outer_iph->protocol != IPPROTO_ICMP))
17976f74b6c2SDavid Ahern 		goto out;
179879a13159SPeter Nørlund 
179979a13159SPeter Nørlund 	if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
18006f74b6c2SDavid Ahern 		goto out;
180179a13159SPeter Nørlund 
180279a13159SPeter Nørlund 	icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
180379a13159SPeter Nørlund 				   &_icmph);
180479a13159SPeter Nørlund 	if (!icmph)
18056f74b6c2SDavid Ahern 		goto out;
180679a13159SPeter Nørlund 
180779a13159SPeter Nørlund 	if (icmph->type != ICMP_DEST_UNREACH &&
180879a13159SPeter Nørlund 	    icmph->type != ICMP_REDIRECT &&
180979a13159SPeter Nørlund 	    icmph->type != ICMP_TIME_EXCEEDED &&
1810bf4e0a3dSNikolay Aleksandrov 	    icmph->type != ICMP_PARAMETERPROB)
18116f74b6c2SDavid Ahern 		goto out;
181279a13159SPeter Nørlund 
181379a13159SPeter Nørlund 	inner_iph = skb_header_pointer(skb,
181479a13159SPeter Nørlund 				       outer_iph->ihl * 4 + sizeof(_icmph),
181579a13159SPeter Nørlund 				       sizeof(_inner_iph), &_inner_iph);
181679a13159SPeter Nørlund 	if (!inner_iph)
18176f74b6c2SDavid Ahern 		goto out;
18186f74b6c2SDavid Ahern 
18196f74b6c2SDavid Ahern 	key_iph = inner_iph;
18206f74b6c2SDavid Ahern out:
18216f74b6c2SDavid Ahern 	hash_keys->addrs.v4addrs.src = key_iph->saddr;
18226f74b6c2SDavid Ahern 	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
182379a13159SPeter Nørlund }
182479a13159SPeter Nørlund 
1825bf4e0a3dSNikolay Aleksandrov /* if skb is set it will be used and fl4 can be NULL */
18267efc0b6bSDavid Ahern int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
1827e37b1e97SRoopa Prabhu 		       const struct sk_buff *skb, struct flow_keys *flkeys)
1828bf4e0a3dSNikolay Aleksandrov {
1829bf4e0a3dSNikolay Aleksandrov 	struct flow_keys hash_keys;
1830bf4e0a3dSNikolay Aleksandrov 	u32 mhash;
1831bf4e0a3dSNikolay Aleksandrov 
1832bf4e0a3dSNikolay Aleksandrov 	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1833bf4e0a3dSNikolay Aleksandrov 	case 0:
1834bf4e0a3dSNikolay Aleksandrov 		memset(&hash_keys, 0, sizeof(hash_keys));
1835bf4e0a3dSNikolay Aleksandrov 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1836bf4e0a3dSNikolay Aleksandrov 		if (skb) {
1837bf4e0a3dSNikolay Aleksandrov 			ip_multipath_l3_keys(skb, &hash_keys);
1838bf4e0a3dSNikolay Aleksandrov 		} else {
1839bf4e0a3dSNikolay Aleksandrov 			hash_keys.addrs.v4addrs.src = fl4->saddr;
1840bf4e0a3dSNikolay Aleksandrov 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
1841bf4e0a3dSNikolay Aleksandrov 		}
1842bf4e0a3dSNikolay Aleksandrov 		break;
1843bf4e0a3dSNikolay Aleksandrov 	case 1:
1844bf4e0a3dSNikolay Aleksandrov 		/* skb is currently provided only when forwarding */
1845bf4e0a3dSNikolay Aleksandrov 		if (skb) {
1846bf4e0a3dSNikolay Aleksandrov 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1847bf4e0a3dSNikolay Aleksandrov 			struct flow_keys keys;
1848bf4e0a3dSNikolay Aleksandrov 
1849bf4e0a3dSNikolay Aleksandrov 			/* short-circuit if we already have L4 hash present */
1850bf4e0a3dSNikolay Aleksandrov 			if (skb->l4_hash)
1851bf4e0a3dSNikolay Aleksandrov 				return skb_get_hash_raw(skb) >> 1;
1852ec7127a5SDavid Ahern 
1853bf4e0a3dSNikolay Aleksandrov 			memset(&hash_keys, 0, sizeof(hash_keys));
18541fe4b118SDavid Ahern 
1855ec7127a5SDavid Ahern 			if (!flkeys) {
1856ec7127a5SDavid Ahern 				skb_flow_dissect_flow_keys(skb, &keys, flag);
1857ec7127a5SDavid Ahern 				flkeys = &keys;
1858ec7127a5SDavid Ahern 			}
1859ec7127a5SDavid Ahern 
1860e37b1e97SRoopa Prabhu 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1861e37b1e97SRoopa Prabhu 			hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
1862e37b1e97SRoopa Prabhu 			hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
1863e37b1e97SRoopa Prabhu 			hash_keys.ports.src = flkeys->ports.src;
1864e37b1e97SRoopa Prabhu 			hash_keys.ports.dst = flkeys->ports.dst;
1865e37b1e97SRoopa Prabhu 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
1866e37b1e97SRoopa Prabhu 		} else {
1867bf4e0a3dSNikolay Aleksandrov 			memset(&hash_keys, 0, sizeof(hash_keys));
1868bf4e0a3dSNikolay Aleksandrov 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1869bf4e0a3dSNikolay Aleksandrov 			hash_keys.addrs.v4addrs.src = fl4->saddr;
1870bf4e0a3dSNikolay Aleksandrov 			hash_keys.addrs.v4addrs.dst = fl4->daddr;
1871bf4e0a3dSNikolay Aleksandrov 			hash_keys.ports.src = fl4->fl4_sport;
1872bf4e0a3dSNikolay Aleksandrov 			hash_keys.ports.dst = fl4->fl4_dport;
1873bf4e0a3dSNikolay Aleksandrov 			hash_keys.basic.ip_proto = fl4->flowi4_proto;
1874bf4e0a3dSNikolay Aleksandrov 		}
1875bf4e0a3dSNikolay Aleksandrov 		break;
1876bf4e0a3dSNikolay Aleksandrov 	}
1877bf4e0a3dSNikolay Aleksandrov 	mhash = flow_hash_from_keys(&hash_keys);
1878bf4e0a3dSNikolay Aleksandrov 
1879bf4e0a3dSNikolay Aleksandrov 	return mhash >> 1;
1880bf4e0a3dSNikolay Aleksandrov }
188179a13159SPeter Nørlund #endif /* CONFIG_IP_ROUTE_MULTIPATH */
188279a13159SPeter Nørlund 
18835969f71dSStephen Hemminger static int ip_mkroute_input(struct sk_buff *skb,
18841da177e4SLinus Torvalds 			    struct fib_result *res,
18851da177e4SLinus Torvalds 			    struct in_device *in_dev,
1886e37b1e97SRoopa Prabhu 			    __be32 daddr, __be32 saddr, u32 tos,
1887e37b1e97SRoopa Prabhu 			    struct flow_keys *hkeys)
18881da177e4SLinus Torvalds {
18891da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_MULTIPATH
18900e884c78SPeter Nørlund 	if (res->fi && res->fi->fib_nhs > 1) {
18917efc0b6bSDavid Ahern 		int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
18920e884c78SPeter Nørlund 
18930e884c78SPeter Nørlund 		fib_select_multipath(res, h);
18940e884c78SPeter Nørlund 	}
18951da177e4SLinus Torvalds #endif
18961da177e4SLinus Torvalds 
18971da177e4SLinus Torvalds 	/* create a routing cache entry */
1898c6cffba4SDavid S. Miller 	return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
18991da177e4SLinus Torvalds }
19001da177e4SLinus Torvalds 
19011da177e4SLinus Torvalds /*
19021da177e4SLinus Torvalds  *	NOTE. We drop all the packets that has local source
19031da177e4SLinus Torvalds  *	addresses, because every properly looped back packet
19041da177e4SLinus Torvalds  *	must have correct destination already attached by output routine.
19051da177e4SLinus Torvalds  *
19061da177e4SLinus Torvalds  *	Such approach solves two big problems:
19071da177e4SLinus Torvalds  *	1. Not simplex devices are handled properly.
19081da177e4SLinus Torvalds  *	2. IP spoofing attempts are filtered with 100% of guarantee.
1909ebc0ffaeSEric Dumazet  *	called with rcu_read_lock()
19101da177e4SLinus Torvalds  */
19111da177e4SLinus Torvalds 
19129e12bb22SAl Viro static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
19135510cdf7SDavid Ahern 			       u8 tos, struct net_device *dev,
19145510cdf7SDavid Ahern 			       struct fib_result *res)
19151da177e4SLinus Torvalds {
191696d36220SEric Dumazet 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1917e37b1e97SRoopa Prabhu 	struct flow_keys *flkeys = NULL, _flkeys;
1918e37b1e97SRoopa Prabhu 	struct net    *net = dev_net(dev);
19191b7179d3SThomas Graf 	struct ip_tunnel_info *tun_info;
1920e37b1e97SRoopa Prabhu 	int		err = -EINVAL;
192195c96174SEric Dumazet 	unsigned int	flags = 0;
19221da177e4SLinus Torvalds 	u32		itag = 0;
19231da177e4SLinus Torvalds 	struct rtable	*rth;
1924e37b1e97SRoopa Prabhu 	struct flowi4	fl4;
1925d2d68ba9SDavid S. Miller 	bool do_cache;
19261da177e4SLinus Torvalds 
19271da177e4SLinus Torvalds 	/* IP on this device is disabled. */
19281da177e4SLinus Torvalds 
19291da177e4SLinus Torvalds 	if (!in_dev)
19301da177e4SLinus Torvalds 		goto out;
19311da177e4SLinus Torvalds 
19321da177e4SLinus Torvalds 	/* Check for the most weird martians, which can be not detected
19331da177e4SLinus Torvalds 	   by fib_lookup.
19341da177e4SLinus Torvalds 	 */
19351da177e4SLinus Torvalds 
193661adedf3SJiri Benc 	tun_info = skb_tunnel_info(skb);
193746fa062aSJiri Benc 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
19381b7179d3SThomas Graf 		fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
19391b7179d3SThomas Graf 	else
19401b7179d3SThomas Graf 		fl4.flowi4_tun_key.tun_id = 0;
1941f38a9eb1SThomas Graf 	skb_dst_drop(skb);
1942f38a9eb1SThomas Graf 
1943d0daebc3SThomas Graf 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
19441da177e4SLinus Torvalds 		goto martian_source;
19451da177e4SLinus Torvalds 
19465510cdf7SDavid Ahern 	res->fi = NULL;
19475510cdf7SDavid Ahern 	res->table = NULL;
194827a954bdSAndy Walls 	if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
19491da177e4SLinus Torvalds 		goto brd_input;
19501da177e4SLinus Torvalds 
19511da177e4SLinus Torvalds 	/* Accept zero addresses only to limited broadcast;
19521da177e4SLinus Torvalds 	 * I even do not know to fix it or not. Waiting for complains :-)
19531da177e4SLinus Torvalds 	 */
1954f97c1e0cSJoe Perches 	if (ipv4_is_zeronet(saddr))
19551da177e4SLinus Torvalds 		goto martian_source;
19561da177e4SLinus Torvalds 
1957d0daebc3SThomas Graf 	if (ipv4_is_zeronet(daddr))
19581da177e4SLinus Torvalds 		goto martian_destination;
19591da177e4SLinus Torvalds 
19609eb43e76SEric Dumazet 	/* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
19619eb43e76SEric Dumazet 	 * and call it once if daddr or/and saddr are loopback addresses
19629eb43e76SEric Dumazet 	 */
19639eb43e76SEric Dumazet 	if (ipv4_is_loopback(daddr)) {
19649eb43e76SEric Dumazet 		if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
1965d0daebc3SThomas Graf 			goto martian_destination;
19669eb43e76SEric Dumazet 	} else if (ipv4_is_loopback(saddr)) {
19679eb43e76SEric Dumazet 		if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
1968d0daebc3SThomas Graf 			goto martian_source;
1969d0daebc3SThomas Graf 	}
1970d0daebc3SThomas Graf 
19711da177e4SLinus Torvalds 	/*
19721da177e4SLinus Torvalds 	 *	Now we are ready to route packet.
19731da177e4SLinus Torvalds 	 */
197468a5e3ddSDavid S. Miller 	fl4.flowi4_oif = 0;
1975e0d56fddSDavid Ahern 	fl4.flowi4_iif = dev->ifindex;
197668a5e3ddSDavid S. Miller 	fl4.flowi4_mark = skb->mark;
197768a5e3ddSDavid S. Miller 	fl4.flowi4_tos = tos;
197868a5e3ddSDavid S. Miller 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1979b84f7878SDavid Ahern 	fl4.flowi4_flags = 0;
198068a5e3ddSDavid S. Miller 	fl4.daddr = daddr;
198168a5e3ddSDavid S. Miller 	fl4.saddr = saddr;
19828bcfd092SJulian Anastasov 	fl4.flowi4_uid = sock_net_uid(net, NULL);
1983e37b1e97SRoopa Prabhu 
19845a847a6eSDavid Ahern 	if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
1985e37b1e97SRoopa Prabhu 		flkeys = &_flkeys;
19865a847a6eSDavid Ahern 	} else {
19875a847a6eSDavid Ahern 		fl4.flowi4_proto = 0;
19885a847a6eSDavid Ahern 		fl4.fl4_sport = 0;
19895a847a6eSDavid Ahern 		fl4.fl4_dport = 0;
19905a847a6eSDavid Ahern 	}
1991e37b1e97SRoopa Prabhu 
19925510cdf7SDavid Ahern 	err = fib_lookup(net, &fl4, res, 0);
1993cd0f0b95SDuan Jiong 	if (err != 0) {
1994cd0f0b95SDuan Jiong 		if (!IN_DEV_FORWARD(in_dev))
1995cd0f0b95SDuan Jiong 			err = -EHOSTUNREACH;
19961da177e4SLinus Torvalds 		goto no_route;
1997cd0f0b95SDuan Jiong 	}
19981da177e4SLinus Torvalds 
1999*5cbf777cSXin Long 	if (res->type == RTN_BROADCAST) {
2000*5cbf777cSXin Long 		if (IN_DEV_BFORWARD(in_dev))
2001*5cbf777cSXin Long 			goto make_route;
20021da177e4SLinus Torvalds 		goto brd_input;
2003*5cbf777cSXin Long 	}
20041da177e4SLinus Torvalds 
20055510cdf7SDavid Ahern 	if (res->type == RTN_LOCAL) {
20065c04c819SMichael Smith 		err = fib_validate_source(skb, saddr, daddr, tos,
20070d5edc68SCong Wang 					  0, dev, in_dev, &itag);
2008b5f7e755SEric Dumazet 		if (err < 0)
20090d753960SDavid Ahern 			goto martian_source;
20101da177e4SLinus Torvalds 		goto local_input;
20111da177e4SLinus Torvalds 	}
20121da177e4SLinus Torvalds 
2013cd0f0b95SDuan Jiong 	if (!IN_DEV_FORWARD(in_dev)) {
2014cd0f0b95SDuan Jiong 		err = -EHOSTUNREACH;
2015251da413SDavid S. Miller 		goto no_route;
2016cd0f0b95SDuan Jiong 	}
20175510cdf7SDavid Ahern 	if (res->type != RTN_UNICAST)
20181da177e4SLinus Torvalds 		goto martian_destination;
20191da177e4SLinus Torvalds 
2020*5cbf777cSXin Long make_route:
2021e37b1e97SRoopa Prabhu 	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
20221da177e4SLinus Torvalds out:	return err;
20231da177e4SLinus Torvalds 
20241da177e4SLinus Torvalds brd_input:
20251da177e4SLinus Torvalds 	if (skb->protocol != htons(ETH_P_IP))
20261da177e4SLinus Torvalds 		goto e_inval;
20271da177e4SLinus Torvalds 
202841347dcdSDavid S. Miller 	if (!ipv4_is_zeronet(saddr)) {
20299e56e380SDavid S. Miller 		err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
20309e56e380SDavid S. Miller 					  in_dev, &itag);
20311da177e4SLinus Torvalds 		if (err < 0)
20320d753960SDavid Ahern 			goto martian_source;
20331da177e4SLinus Torvalds 	}
20341da177e4SLinus Torvalds 	flags |= RTCF_BROADCAST;
20355510cdf7SDavid Ahern 	res->type = RTN_BROADCAST;
20361da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_brd);
20371da177e4SLinus Torvalds 
20381da177e4SLinus Torvalds local_input:
2039d2d68ba9SDavid S. Miller 	do_cache = false;
20405510cdf7SDavid Ahern 	if (res->fi) {
2041fe3edf45SDavid S. Miller 		if (!itag) {
20425510cdf7SDavid Ahern 			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
2043d2d68ba9SDavid S. Miller 			if (rt_cache_valid(rth)) {
2044c6cffba4SDavid S. Miller 				skb_dst_set_noref(skb, &rth->dst);
2045c6cffba4SDavid S. Miller 				err = 0;
2046c6cffba4SDavid S. Miller 				goto out;
2047d2d68ba9SDavid S. Miller 			}
2048d2d68ba9SDavid S. Miller 			do_cache = true;
2049d2d68ba9SDavid S. Miller 		}
2050d2d68ba9SDavid S. Miller 	}
2051d2d68ba9SDavid S. Miller 
2052f5a0aab8SDavid Ahern 	rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
20535510cdf7SDavid Ahern 			   flags | RTCF_LOCAL, res->type,
2054d2d68ba9SDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
20551da177e4SLinus Torvalds 	if (!rth)
20561da177e4SLinus Torvalds 		goto e_nobufs;
20571da177e4SLinus Torvalds 
2058d8d1f30bSChangli Gao 	rth->dst.output= ip_rt_bug;
2059cf911662SDavid S. Miller #ifdef CONFIG_IP_ROUTE_CLASSID
2060cf911662SDavid S. Miller 	rth->dst.tclassid = itag;
2061cf911662SDavid S. Miller #endif
20629917e1e8SDavid S. Miller 	rth->rt_is_input = 1;
2063571e7226SRoopa Prabhu 
2064a6254864SDuan Jiong 	RT_CACHE_STAT_INC(in_slow_tot);
20655510cdf7SDavid Ahern 	if (res->type == RTN_UNREACHABLE) {
2066d8d1f30bSChangli Gao 		rth->dst.input= ip_error;
2067d8d1f30bSChangli Gao 		rth->dst.error= -err;
20681da177e4SLinus Torvalds 		rth->rt_flags 	&= ~RTCF_LOCAL;
20691da177e4SLinus Torvalds 	}
2070efd85700SThomas Graf 
2071dcdfdf56SAlexei Starovoitov 	if (do_cache) {
20725510cdf7SDavid Ahern 		struct fib_nh *nh = &FIB_RES_NH(*res);
2073efd85700SThomas Graf 
2074efd85700SThomas Graf 		rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
2075efd85700SThomas Graf 		if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
2076efd85700SThomas Graf 			WARN_ON(rth->dst.input == lwtunnel_input);
2077efd85700SThomas Graf 			rth->dst.lwtstate->orig_input = rth->dst.input;
2078efd85700SThomas Graf 			rth->dst.input = lwtunnel_input;
2079efd85700SThomas Graf 		}
2080efd85700SThomas Graf 
2081a4c2fd7fSWei Wang 		if (unlikely(!rt_cache_route(nh, rth)))
2082dcdfdf56SAlexei Starovoitov 			rt_add_uncached_list(rth);
2083dcdfdf56SAlexei Starovoitov 	}
208489aef892SDavid S. Miller 	skb_dst_set(skb, &rth->dst);
2085b23dd4feSDavid S. Miller 	err = 0;
2086ebc0ffaeSEric Dumazet 	goto out;
20871da177e4SLinus Torvalds 
20881da177e4SLinus Torvalds no_route:
20891da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_no_route);
20905510cdf7SDavid Ahern 	res->type = RTN_UNREACHABLE;
20915510cdf7SDavid Ahern 	res->fi = NULL;
20925510cdf7SDavid Ahern 	res->table = NULL;
20931da177e4SLinus Torvalds 	goto local_input;
20941da177e4SLinus Torvalds 
20951da177e4SLinus Torvalds 	/*
20961da177e4SLinus Torvalds 	 *	Do not cache martian addresses: they should be logged (RFC1812)
20971da177e4SLinus Torvalds 	 */
20981da177e4SLinus Torvalds martian_destination:
20991da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(in_martian_dst);
21001da177e4SLinus Torvalds #ifdef CONFIG_IP_ROUTE_VERBOSE
2101e87cc472SJoe Perches 	if (IN_DEV_LOG_MARTIANS(in_dev))
2102e87cc472SJoe Perches 		net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
2103673d57e7SHarvey Harrison 				     &daddr, &saddr, dev->name);
21041da177e4SLinus Torvalds #endif
21052c2910a4SDietmar Eggemann 
21061da177e4SLinus Torvalds e_inval:
21071da177e4SLinus Torvalds 	err = -EINVAL;
2108ebc0ffaeSEric Dumazet 	goto out;
21091da177e4SLinus Torvalds 
21101da177e4SLinus Torvalds e_nobufs:
21111da177e4SLinus Torvalds 	err = -ENOBUFS;
2112ebc0ffaeSEric Dumazet 	goto out;
21131da177e4SLinus Torvalds 
21141da177e4SLinus Torvalds martian_source:
21151da177e4SLinus Torvalds 	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2116ebc0ffaeSEric Dumazet 	goto out;
21171da177e4SLinus Torvalds }
21181da177e4SLinus Torvalds 
2119c6cffba4SDavid S. Miller int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
212038a424e4SDavid Miller 			 u8 tos, struct net_device *dev)
21211da177e4SLinus Torvalds {
21225510cdf7SDavid Ahern 	struct fib_result res;
21235510cdf7SDavid Ahern 	int err;
21241da177e4SLinus Torvalds 
21256e28099dSJulian Anastasov 	tos &= IPTOS_RT_MASK;
212696d36220SEric Dumazet 	rcu_read_lock();
21275510cdf7SDavid Ahern 	err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
21285510cdf7SDavid Ahern 	rcu_read_unlock();
212996d36220SEric Dumazet 
21305510cdf7SDavid Ahern 	return err;
21315510cdf7SDavid Ahern }
21325510cdf7SDavid Ahern EXPORT_SYMBOL(ip_route_input_noref);
21335510cdf7SDavid Ahern 
21345510cdf7SDavid Ahern /* called with rcu_read_lock held */
21355510cdf7SDavid Ahern int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
21365510cdf7SDavid Ahern 		       u8 tos, struct net_device *dev, struct fib_result *res)
21375510cdf7SDavid Ahern {
21381da177e4SLinus Torvalds 	/* Multicast recognition logic is moved from route cache to here.
21391da177e4SLinus Torvalds 	   The problem was that too many Ethernet cards have broken/missing
21401da177e4SLinus Torvalds 	   hardware multicast filters :-( As result the host on multicasting
21411da177e4SLinus Torvalds 	   network acquires a lot of useless route cache entries, sort of
21421da177e4SLinus Torvalds 	   SDR messages from all the world. Now we try to get rid of them.
21431da177e4SLinus Torvalds 	   Really, provided software IP multicast filter is organized
21441da177e4SLinus Torvalds 	   reasonably (at least, hashed), it does not result in a slowdown
21451da177e4SLinus Torvalds 	   comparing with route cache reject entries.
21461da177e4SLinus Torvalds 	   Note, that multicast routers are not affected, because
21471da177e4SLinus Torvalds 	   route cache entry is created eventually.
21481da177e4SLinus Torvalds 	 */
2149f97c1e0cSJoe Perches 	if (ipv4_is_multicast(daddr)) {
215096d36220SEric Dumazet 		struct in_device *in_dev = __in_dev_get_rcu(dev);
2151e58e4159SDavid Ahern 		int our = 0;
21525510cdf7SDavid Ahern 		int err = -EINVAL;
21531da177e4SLinus Torvalds 
2154e58e4159SDavid Ahern 		if (in_dev)
2155e58e4159SDavid Ahern 			our = ip_check_mc_rcu(in_dev, daddr, saddr,
2156eddc9ec5SArnaldo Carvalho de Melo 					      ip_hdr(skb)->protocol);
2157e58e4159SDavid Ahern 
2158e58e4159SDavid Ahern 		/* check l3 master if no match yet */
2159e58e4159SDavid Ahern 		if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
2160e58e4159SDavid Ahern 			struct in_device *l3_in_dev;
2161e58e4159SDavid Ahern 
2162e58e4159SDavid Ahern 			l3_in_dev = __in_dev_get_rcu(skb->dev);
2163e58e4159SDavid Ahern 			if (l3_in_dev)
2164e58e4159SDavid Ahern 				our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2165e58e4159SDavid Ahern 						      ip_hdr(skb)->protocol);
2166e58e4159SDavid Ahern 		}
2167e58e4159SDavid Ahern 
21681da177e4SLinus Torvalds 		if (our
21691da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE
21709d4fb27dSJoe Perches 			||
21719d4fb27dSJoe Perches 		    (!ipv4_is_local_multicast(daddr) &&
2172f97c1e0cSJoe Perches 		     IN_DEV_MFORWARD(in_dev))
21731da177e4SLinus Torvalds #endif
21741da177e4SLinus Torvalds 		   ) {
21755510cdf7SDavid Ahern 			err = ip_route_input_mc(skb, daddr, saddr,
21761da177e4SLinus Torvalds 						tos, dev, our);
2177e58e4159SDavid Ahern 		}
21785510cdf7SDavid Ahern 		return err;
21791da177e4SLinus Torvalds 	}
21805510cdf7SDavid Ahern 
21815510cdf7SDavid Ahern 	return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
21821da177e4SLinus Torvalds }
21831da177e4SLinus Torvalds 
2184ebc0ffaeSEric Dumazet /* called with rcu_read_lock() */
2185982721f3SDavid S. Miller static struct rtable *__mkroute_output(const struct fib_result *res,
21861a00fee4SDavid Miller 				       const struct flowi4 *fl4, int orig_oif,
2187f61759e6SJulian Anastasov 				       struct net_device *dev_out,
21885ada5527SDavid S. Miller 				       unsigned int flags)
21891da177e4SLinus Torvalds {
2190982721f3SDavid S. Miller 	struct fib_info *fi = res->fi;
2191f2bb4bedSDavid S. Miller 	struct fib_nh_exception *fnhe;
21925ada5527SDavid S. Miller 	struct in_device *in_dev;
2193982721f3SDavid S. Miller 	u16 type = res->type;
21945ada5527SDavid S. Miller 	struct rtable *rth;
2195c92b9655SJulian Anastasov 	bool do_cache;
21961da177e4SLinus Torvalds 
2197d0daebc3SThomas Graf 	in_dev = __in_dev_get_rcu(dev_out);
2198d0daebc3SThomas Graf 	if (!in_dev)
2199d0daebc3SThomas Graf 		return ERR_PTR(-EINVAL);
2200d0daebc3SThomas Graf 
2201d0daebc3SThomas Graf 	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
22025f02ce24SDavid Ahern 		if (ipv4_is_loopback(fl4->saddr) &&
22035f02ce24SDavid Ahern 		    !(dev_out->flags & IFF_LOOPBACK) &&
22045f02ce24SDavid Ahern 		    !netif_is_l3_master(dev_out))
22055ada5527SDavid S. Miller 			return ERR_PTR(-EINVAL);
22061da177e4SLinus Torvalds 
220768a5e3ddSDavid S. Miller 	if (ipv4_is_lbcast(fl4->daddr))
2208982721f3SDavid S. Miller 		type = RTN_BROADCAST;
220968a5e3ddSDavid S. Miller 	else if (ipv4_is_multicast(fl4->daddr))
2210982721f3SDavid S. Miller 		type = RTN_MULTICAST;
221168a5e3ddSDavid S. Miller 	else if (ipv4_is_zeronet(fl4->daddr))
22125ada5527SDavid S. Miller 		return ERR_PTR(-EINVAL);
22131da177e4SLinus Torvalds 
22141da177e4SLinus Torvalds 	if (dev_out->flags & IFF_LOOPBACK)
22151da177e4SLinus Torvalds 		flags |= RTCF_LOCAL;
22161da177e4SLinus Torvalds 
221763617421SJulian Anastasov 	do_cache = true;
2218982721f3SDavid S. Miller 	if (type == RTN_BROADCAST) {
22191da177e4SLinus Torvalds 		flags |= RTCF_BROADCAST | RTCF_LOCAL;
2220982721f3SDavid S. Miller 		fi = NULL;
2221982721f3SDavid S. Miller 	} else if (type == RTN_MULTICAST) {
22221da177e4SLinus Torvalds 		flags |= RTCF_MULTICAST | RTCF_LOCAL;
2223813b3b5dSDavid S. Miller 		if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2224813b3b5dSDavid S. Miller 				     fl4->flowi4_proto))
22251da177e4SLinus Torvalds 			flags &= ~RTCF_LOCAL;
222663617421SJulian Anastasov 		else
222763617421SJulian Anastasov 			do_cache = false;
22281da177e4SLinus Torvalds 		/* If multicast route do not exist use
2229dd28d1a0SEric Dumazet 		 * default one, but do not gateway in this case.
2230dd28d1a0SEric Dumazet 		 * Yes, it is hack.
22311da177e4SLinus Torvalds 		 */
2232982721f3SDavid S. Miller 		if (fi && res->prefixlen < 4)
2233982721f3SDavid S. Miller 			fi = NULL;
2234d6d5e999SChris Friesen 	} else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2235d6d5e999SChris Friesen 		   (orig_oif != dev_out->ifindex)) {
2236d6d5e999SChris Friesen 		/* For local routes that require a particular output interface
2237d6d5e999SChris Friesen 		 * we do not want to cache the result.  Caching the result
2238d6d5e999SChris Friesen 		 * causes incorrect behaviour when there are multiple source
2239d6d5e999SChris Friesen 		 * addresses on the interface, the end result being that if the
2240d6d5e999SChris Friesen 		 * intended recipient is waiting on that interface for the
2241d6d5e999SChris Friesen 		 * packet he won't receive it because it will be delivered on
2242d6d5e999SChris Friesen 		 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2243d6d5e999SChris Friesen 		 * be set to the loopback interface as well.
2244d6d5e999SChris Friesen 		 */
224594720e3aSJulian Anastasov 		do_cache = false;
22461da177e4SLinus Torvalds 	}
22471da177e4SLinus Torvalds 
2248f2bb4bedSDavid S. Miller 	fnhe = NULL;
224963617421SJulian Anastasov 	do_cache &= fi != NULL;
225094720e3aSJulian Anastasov 	if (fi) {
2251d26b3a7cSEric Dumazet 		struct rtable __rcu **prth;
2252c92b9655SJulian Anastasov 		struct fib_nh *nh = &FIB_RES_NH(*res);
2253d26b3a7cSEric Dumazet 
2254c92b9655SJulian Anastasov 		fnhe = find_exception(nh, fl4->daddr);
225594720e3aSJulian Anastasov 		if (!do_cache)
225694720e3aSJulian Anastasov 			goto add;
2257deed49dfSXin Long 		if (fnhe) {
22582ffae99dSTimo Teräs 			prth = &fnhe->fnhe_rth_output;
2259deed49dfSXin Long 		} else {
2260c92b9655SJulian Anastasov 			if (unlikely(fl4->flowi4_flags &
2261c92b9655SJulian Anastasov 				     FLOWI_FLAG_KNOWN_NH &&
2262c92b9655SJulian Anastasov 				     !(nh->nh_gw &&
2263c92b9655SJulian Anastasov 				       nh->nh_scope == RT_SCOPE_LINK))) {
2264c92b9655SJulian Anastasov 				do_cache = false;
2265c92b9655SJulian Anastasov 				goto add;
2266c92b9655SJulian Anastasov 			}
2267903ceff7SChristoph Lameter 			prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
226894720e3aSJulian Anastasov 		}
2269d26b3a7cSEric Dumazet 		rth = rcu_dereference(*prth);
22709df16efaSWei Wang 		if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
2271f2bb4bedSDavid S. Miller 			return rth;
2272f2bb4bedSDavid S. Miller 	}
2273c92b9655SJulian Anastasov 
2274c92b9655SJulian Anastasov add:
2275d08c4f35SDavid Ahern 	rth = rt_dst_alloc(dev_out, flags, type,
22765c1e6aa3SDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
2277f2bb4bedSDavid S. Miller 			   IN_DEV_CONF_GET(in_dev, NOXFRM),
2278c92b9655SJulian Anastasov 			   do_cache);
22798391d07bSDimitris Michailidis 	if (!rth)
22805ada5527SDavid S. Miller 		return ERR_PTR(-ENOBUFS);
22818391d07bSDimitris Michailidis 
22829438c871SDavid Ahern 	rth->rt_iif = orig_oif;
2283b7503e0cSDavid Ahern 
22841da177e4SLinus Torvalds 	RT_CACHE_STAT_INC(out_slow_tot);
22851da177e4SLinus Torvalds 
22861da177e4SLinus Torvalds 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
22871da177e4SLinus Torvalds 		if (flags & RTCF_LOCAL &&
22881da177e4SLinus Torvalds 		    !(dev_out->flags & IFF_LOOPBACK)) {
2289d8d1f30bSChangli Gao 			rth->dst.output = ip_mc_output;
22901da177e4SLinus Torvalds 			RT_CACHE_STAT_INC(out_slow_mc);
22911da177e4SLinus Torvalds 		}
22921da177e4SLinus Torvalds #ifdef CONFIG_IP_MROUTE
2293982721f3SDavid S. Miller 		if (type == RTN_MULTICAST) {
22941da177e4SLinus Torvalds 			if (IN_DEV_MFORWARD(in_dev) &&
2295813b3b5dSDavid S. Miller 			    !ipv4_is_local_multicast(fl4->daddr)) {
2296d8d1f30bSChangli Gao 				rth->dst.input = ip_mr_input;
2297d8d1f30bSChangli Gao 				rth->dst.output = ip_mc_output;
22981da177e4SLinus Torvalds 			}
22991da177e4SLinus Torvalds 		}
23001da177e4SLinus Torvalds #endif
23011da177e4SLinus Torvalds 	}
23021da177e4SLinus Torvalds 
2303a4c2fd7fSWei Wang 	rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
23049942895bSDavid Ahern 	lwtunnel_set_redirect(&rth->dst);
23051da177e4SLinus Torvalds 
23065ada5527SDavid S. Miller 	return rth;
23071da177e4SLinus Torvalds }
23081da177e4SLinus Torvalds 
23091da177e4SLinus Torvalds /*
23101da177e4SLinus Torvalds  * Major route resolver routine.
23111da177e4SLinus Torvalds  */
23121da177e4SLinus Torvalds 
23133abd1adeSDavid Ahern struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2314bf4e0a3dSNikolay Aleksandrov 					const struct sk_buff *skb)
23151da177e4SLinus Torvalds {
2316f61759e6SJulian Anastasov 	__u8 tos = RT_FL_TOS(fl4);
2317d0ea2b12SEric Dumazet 	struct fib_result res = {
2318d0ea2b12SEric Dumazet 		.type		= RTN_UNSPEC,
2319d0ea2b12SEric Dumazet 		.fi		= NULL,
2320d0ea2b12SEric Dumazet 		.table		= NULL,
2321d0ea2b12SEric Dumazet 		.tclassid	= 0,
2322d0ea2b12SEric Dumazet 	};
23235ada5527SDavid S. Miller 	struct rtable *rth;
23241da177e4SLinus Torvalds 
23251fb9489bSPavel Emelyanov 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
2326813b3b5dSDavid S. Miller 	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2327813b3b5dSDavid S. Miller 	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
232844713b67SDavid S. Miller 			 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
232944713b67SDavid S. Miller 
2330010c2708SDavid S. Miller 	rcu_read_lock();
23313abd1adeSDavid Ahern 	rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
23323abd1adeSDavid Ahern 	rcu_read_unlock();
23333abd1adeSDavid Ahern 
23343abd1adeSDavid Ahern 	return rth;
23353abd1adeSDavid Ahern }
23363abd1adeSDavid Ahern EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
23373abd1adeSDavid Ahern 
23383abd1adeSDavid Ahern struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
23393abd1adeSDavid Ahern 					    struct fib_result *res,
23403abd1adeSDavid Ahern 					    const struct sk_buff *skb)
23413abd1adeSDavid Ahern {
23423abd1adeSDavid Ahern 	struct net_device *dev_out = NULL;
23433abd1adeSDavid Ahern 	int orig_oif = fl4->flowi4_oif;
23443abd1adeSDavid Ahern 	unsigned int flags = 0;
23453abd1adeSDavid Ahern 	struct rtable *rth;
23463abd1adeSDavid Ahern 	int err = -ENETUNREACH;
23473abd1adeSDavid Ahern 
2348813b3b5dSDavid S. Miller 	if (fl4->saddr) {
2349b23dd4feSDavid S. Miller 		rth = ERR_PTR(-EINVAL);
2350813b3b5dSDavid S. Miller 		if (ipv4_is_multicast(fl4->saddr) ||
2351813b3b5dSDavid S. Miller 		    ipv4_is_lbcast(fl4->saddr) ||
2352813b3b5dSDavid S. Miller 		    ipv4_is_zeronet(fl4->saddr))
23531da177e4SLinus Torvalds 			goto out;
23541da177e4SLinus Torvalds 
23551da177e4SLinus Torvalds 		/* I removed check for oif == dev_out->oif here.
23561da177e4SLinus Torvalds 		   It was wrong for two reasons:
23571ab35276SDenis V. Lunev 		   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
23581ab35276SDenis V. Lunev 		      is assigned to multiple interfaces.
23591da177e4SLinus Torvalds 		   2. Moreover, we are allowed to send packets with saddr
23601da177e4SLinus Torvalds 		      of another iface. --ANK
23611da177e4SLinus Torvalds 		 */
23621da177e4SLinus Torvalds 
2363813b3b5dSDavid S. Miller 		if (fl4->flowi4_oif == 0 &&
2364813b3b5dSDavid S. Miller 		    (ipv4_is_multicast(fl4->daddr) ||
2365813b3b5dSDavid S. Miller 		     ipv4_is_lbcast(fl4->daddr))) {
2366a210d01aSJulian Anastasov 			/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2367813b3b5dSDavid S. Miller 			dev_out = __ip_dev_find(net, fl4->saddr, false);
236851456b29SIan Morris 			if (!dev_out)
2369a210d01aSJulian Anastasov 				goto out;
2370a210d01aSJulian Anastasov 
23711da177e4SLinus Torvalds 			/* Special hack: user can direct multicasts
23721da177e4SLinus Torvalds 			   and limited broadcast via necessary interface
23731da177e4SLinus Torvalds 			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
23741da177e4SLinus Torvalds 			   This hack is not just for fun, it allows
23751da177e4SLinus Torvalds 			   vic,vat and friends to work.
23761da177e4SLinus Torvalds 			   They bind socket to loopback, set ttl to zero
23771da177e4SLinus Torvalds 			   and expect that it will work.
23781da177e4SLinus Torvalds 			   From the viewpoint of routing cache they are broken,
23791da177e4SLinus Torvalds 			   because we are not allowed to build multicast path
23801da177e4SLinus Torvalds 			   with loopback source addr (look, routing cache
23811da177e4SLinus Torvalds 			   cannot know, that ttl is zero, so that packet
23821da177e4SLinus Torvalds 			   will not leave this host and route is valid).
23831da177e4SLinus Torvalds 			   Luckily, this hack is good workaround.
23841da177e4SLinus Torvalds 			 */
23851da177e4SLinus Torvalds 
2386813b3b5dSDavid S. Miller 			fl4->flowi4_oif = dev_out->ifindex;
23871da177e4SLinus Torvalds 			goto make_route;
23881da177e4SLinus Torvalds 		}
2389a210d01aSJulian Anastasov 
2390813b3b5dSDavid S. Miller 		if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
2391a210d01aSJulian Anastasov 			/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2392813b3b5dSDavid S. Miller 			if (!__ip_dev_find(net, fl4->saddr, false))
2393a210d01aSJulian Anastasov 				goto out;
23941da177e4SLinus Torvalds 		}
2395a210d01aSJulian Anastasov 	}
23961da177e4SLinus Torvalds 
23971da177e4SLinus Torvalds 
2398813b3b5dSDavid S. Miller 	if (fl4->flowi4_oif) {
2399813b3b5dSDavid S. Miller 		dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
2400b23dd4feSDavid S. Miller 		rth = ERR_PTR(-ENODEV);
240151456b29SIan Morris 		if (!dev_out)
24021da177e4SLinus Torvalds 			goto out;
2403e5ed6399SHerbert Xu 
2404e5ed6399SHerbert Xu 		/* RACE: Check return value of inet_select_addr instead. */
2405fc75fc83SEric Dumazet 		if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2406b23dd4feSDavid S. Miller 			rth = ERR_PTR(-ENETUNREACH);
2407fc75fc83SEric Dumazet 			goto out;
2408fc75fc83SEric Dumazet 		}
2409813b3b5dSDavid S. Miller 		if (ipv4_is_local_multicast(fl4->daddr) ||
24106a211654SAndrew Lunn 		    ipv4_is_lbcast(fl4->daddr) ||
24116a211654SAndrew Lunn 		    fl4->flowi4_proto == IPPROTO_IGMP) {
2412813b3b5dSDavid S. Miller 			if (!fl4->saddr)
2413813b3b5dSDavid S. Miller 				fl4->saddr = inet_select_addr(dev_out, 0,
24141da177e4SLinus Torvalds 							      RT_SCOPE_LINK);
24151da177e4SLinus Torvalds 			goto make_route;
24161da177e4SLinus Torvalds 		}
24170a7e2260SJiri Benc 		if (!fl4->saddr) {
2418813b3b5dSDavid S. Miller 			if (ipv4_is_multicast(fl4->daddr))
2419813b3b5dSDavid S. Miller 				fl4->saddr = inet_select_addr(dev_out, 0,
2420813b3b5dSDavid S. Miller 							      fl4->flowi4_scope);
2421813b3b5dSDavid S. Miller 			else if (!fl4->daddr)
2422813b3b5dSDavid S. Miller 				fl4->saddr = inet_select_addr(dev_out, 0,
24231da177e4SLinus Torvalds 							      RT_SCOPE_HOST);
24241da177e4SLinus Torvalds 		}
2425613d09b3SDavid Ahern 	}
24261da177e4SLinus Torvalds 
2427813b3b5dSDavid S. Miller 	if (!fl4->daddr) {
2428813b3b5dSDavid S. Miller 		fl4->daddr = fl4->saddr;
2429813b3b5dSDavid S. Miller 		if (!fl4->daddr)
2430813b3b5dSDavid S. Miller 			fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
2431b40afd0eSDenis V. Lunev 		dev_out = net->loopback_dev;
24321fb9489bSPavel Emelyanov 		fl4->flowi4_oif = LOOPBACK_IFINDEX;
24333abd1adeSDavid Ahern 		res->type = RTN_LOCAL;
24341da177e4SLinus Torvalds 		flags |= RTCF_LOCAL;
24351da177e4SLinus Torvalds 		goto make_route;
24361da177e4SLinus Torvalds 	}
24371da177e4SLinus Torvalds 
24383abd1adeSDavid Ahern 	err = fib_lookup(net, fl4, res, 0);
24390315e382SNikola Forró 	if (err) {
24403abd1adeSDavid Ahern 		res->fi = NULL;
24413abd1adeSDavid Ahern 		res->table = NULL;
24426104e112SDavid Ahern 		if (fl4->flowi4_oif &&
2443e58e4159SDavid Ahern 		    (ipv4_is_multicast(fl4->daddr) ||
2444e58e4159SDavid Ahern 		    !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
24451da177e4SLinus Torvalds 			/* Apparently, routing tables are wrong. Assume,
24461da177e4SLinus Torvalds 			   that the destination is on link.
24471da177e4SLinus Torvalds 
24481da177e4SLinus Torvalds 			   WHY? DW.
24491da177e4SLinus Torvalds 			   Because we are allowed to send to iface
24501da177e4SLinus Torvalds 			   even if it has NO routes and NO assigned
24511da177e4SLinus Torvalds 			   addresses. When oif is specified, routing
24521da177e4SLinus Torvalds 			   tables are looked up with only one purpose:
24531da177e4SLinus Torvalds 			   to catch if destination is gatewayed, rather than
24541da177e4SLinus Torvalds 			   direct. Moreover, if MSG_DONTROUTE is set,
24551da177e4SLinus Torvalds 			   we send packet, ignoring both routing tables
24561da177e4SLinus Torvalds 			   and ifaddr state. --ANK
24571da177e4SLinus Torvalds 
24581da177e4SLinus Torvalds 
24591da177e4SLinus Torvalds 			   We could make it even if oif is unknown,
24601da177e4SLinus Torvalds 			   likely IPv6, but we do not.
24611da177e4SLinus Torvalds 			 */
24621da177e4SLinus Torvalds 
2463813b3b5dSDavid S. Miller 			if (fl4->saddr == 0)
2464813b3b5dSDavid S. Miller 				fl4->saddr = inet_select_addr(dev_out, 0,
24651da177e4SLinus Torvalds 							      RT_SCOPE_LINK);
24663abd1adeSDavid Ahern 			res->type = RTN_UNICAST;
24671da177e4SLinus Torvalds 			goto make_route;
24681da177e4SLinus Torvalds 		}
24690315e382SNikola Forró 		rth = ERR_PTR(err);
24701da177e4SLinus Torvalds 		goto out;
24711da177e4SLinus Torvalds 	}
24721da177e4SLinus Torvalds 
24733abd1adeSDavid Ahern 	if (res->type == RTN_LOCAL) {
2474813b3b5dSDavid S. Miller 		if (!fl4->saddr) {
24753abd1adeSDavid Ahern 			if (res->fi->fib_prefsrc)
24763abd1adeSDavid Ahern 				fl4->saddr = res->fi->fib_prefsrc;
24779fc3bbb4SJoel Sing 			else
2478813b3b5dSDavid S. Miller 				fl4->saddr = fl4->daddr;
24799fc3bbb4SJoel Sing 		}
24805f02ce24SDavid Ahern 
24815f02ce24SDavid Ahern 		/* L3 master device is the loopback for that domain */
24823abd1adeSDavid Ahern 		dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
2483b7c8487cSRobert Shearman 			net->loopback_dev;
2484839da4d9SDavid Ahern 
2485839da4d9SDavid Ahern 		/* make sure orig_oif points to fib result device even
2486839da4d9SDavid Ahern 		 * though packet rx/tx happens over loopback or l3mdev
2487839da4d9SDavid Ahern 		 */
2488839da4d9SDavid Ahern 		orig_oif = FIB_RES_OIF(*res);
2489839da4d9SDavid Ahern 
2490813b3b5dSDavid S. Miller 		fl4->flowi4_oif = dev_out->ifindex;
24911da177e4SLinus Torvalds 		flags |= RTCF_LOCAL;
24921da177e4SLinus Torvalds 		goto make_route;
24931da177e4SLinus Torvalds 	}
24941da177e4SLinus Torvalds 
24953abd1adeSDavid Ahern 	fib_select_path(net, res, fl4, skb);
24961da177e4SLinus Torvalds 
24973abd1adeSDavid Ahern 	dev_out = FIB_RES_DEV(*res);
2498813b3b5dSDavid S. Miller 	fl4->flowi4_oif = dev_out->ifindex;
24991da177e4SLinus Torvalds 
25001da177e4SLinus Torvalds 
25011da177e4SLinus Torvalds make_route:
25023abd1adeSDavid Ahern 	rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
25031da177e4SLinus Torvalds 
2504010c2708SDavid S. Miller out:
2505b23dd4feSDavid S. Miller 	return rth;
25061da177e4SLinus Torvalds }
2507d8c97a94SArnaldo Carvalho de Melo 
2508ae2688d5SJianzhao Wang static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2509ae2688d5SJianzhao Wang {
2510ae2688d5SJianzhao Wang 	return NULL;
2511ae2688d5SJianzhao Wang }
2512ae2688d5SJianzhao Wang 
2513ebb762f2SSteffen Klassert static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2514ec831ea7SRoland Dreier {
2515618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2516618f9bc7SSteffen Klassert 
2517618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
2518ec831ea7SRoland Dreier }
2519ec831ea7SRoland Dreier 
25206700c270SDavid S. Miller static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
25216700c270SDavid S. Miller 					  struct sk_buff *skb, u32 mtu)
252214e50e57SDavid S. Miller {
252314e50e57SDavid S. Miller }
252414e50e57SDavid S. Miller 
25256700c270SDavid S. Miller static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
25266700c270SDavid S. Miller 				       struct sk_buff *skb)
2527b587ee3bSDavid S. Miller {
2528b587ee3bSDavid S. Miller }
2529b587ee3bSDavid S. Miller 
25300972ddb2SHeld Bernhard static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
25310972ddb2SHeld Bernhard 					  unsigned long old)
25320972ddb2SHeld Bernhard {
25330972ddb2SHeld Bernhard 	return NULL;
25340972ddb2SHeld Bernhard }
25350972ddb2SHeld Bernhard 
253614e50e57SDavid S. Miller static struct dst_ops ipv4_dst_blackhole_ops = {
253714e50e57SDavid S. Miller 	.family			=	AF_INET,
2538ae2688d5SJianzhao Wang 	.check			=	ipv4_blackhole_dst_check,
2539ebb762f2SSteffen Klassert 	.mtu			=	ipv4_blackhole_mtu,
2540214f45c9SEric Dumazet 	.default_advmss		=	ipv4_default_advmss,
254114e50e57SDavid S. Miller 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
2542b587ee3bSDavid S. Miller 	.redirect		=	ipv4_rt_blackhole_redirect,
25430972ddb2SHeld Bernhard 	.cow_metrics		=	ipv4_rt_blackhole_cow_metrics,
2544d3aaeb38SDavid S. Miller 	.neigh_lookup		=	ipv4_neigh_lookup,
254514e50e57SDavid S. Miller };
254614e50e57SDavid S. Miller 
25472774c131SDavid S. Miller struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
254814e50e57SDavid S. Miller {
25492774c131SDavid S. Miller 	struct rtable *ort = (struct rtable *) dst_orig;
2550f5b0a874SDavid S. Miller 	struct rtable *rt;
255114e50e57SDavid S. Miller 
25526c0e7284SSteffen Klassert 	rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
255314e50e57SDavid S. Miller 	if (rt) {
2554d8d1f30bSChangli Gao 		struct dst_entry *new = &rt->dst;
255514e50e57SDavid S. Miller 
255614e50e57SDavid S. Miller 		new->__use = 1;
2557352e512cSHerbert Xu 		new->input = dst_discard;
2558ede2059dSEric W. Biederman 		new->output = dst_discard_out;
255914e50e57SDavid S. Miller 
25601dbe3252SWei Wang 		new->dev = net->loopback_dev;
256114e50e57SDavid S. Miller 		if (new->dev)
256214e50e57SDavid S. Miller 			dev_hold(new->dev);
256314e50e57SDavid S. Miller 
25649917e1e8SDavid S. Miller 		rt->rt_is_input = ort->rt_is_input;
25655e2b61f7SDavid S. Miller 		rt->rt_iif = ort->rt_iif;
25665943634fSDavid S. Miller 		rt->rt_pmtu = ort->rt_pmtu;
2567d52e5a7eSSabrina Dubroca 		rt->rt_mtu_locked = ort->rt_mtu_locked;
256814e50e57SDavid S. Miller 
2569ca4c3fc2Sfan.du 		rt->rt_genid = rt_genid_ipv4(net);
257014e50e57SDavid S. Miller 		rt->rt_flags = ort->rt_flags;
257114e50e57SDavid S. Miller 		rt->rt_type = ort->rt_type;
257214e50e57SDavid S. Miller 		rt->rt_gateway = ort->rt_gateway;
2573155e8336SJulian Anastasov 		rt->rt_uses_gateway = ort->rt_uses_gateway;
257414e50e57SDavid S. Miller 
2575caacf05eSDavid S. Miller 		INIT_LIST_HEAD(&rt->rt_uncached);
257614e50e57SDavid S. Miller 	}
257714e50e57SDavid S. Miller 
25782774c131SDavid S. Miller 	dst_release(dst_orig);
25792774c131SDavid S. Miller 
25802774c131SDavid S. Miller 	return rt ? &rt->dst : ERR_PTR(-ENOMEM);
258114e50e57SDavid S. Miller }
258214e50e57SDavid S. Miller 
25839d6ec938SDavid S. Miller struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
25846f9c9615SEric Dumazet 				    const struct sock *sk)
25851da177e4SLinus Torvalds {
25869d6ec938SDavid S. Miller 	struct rtable *rt = __ip_route_output_key(net, flp4);
25871da177e4SLinus Torvalds 
2588b23dd4feSDavid S. Miller 	if (IS_ERR(rt))
2589b23dd4feSDavid S. Miller 		return rt;
25901da177e4SLinus Torvalds 
259156157872SDavid S. Miller 	if (flp4->flowi4_proto)
2592f92ee619SSteffen Klassert 		rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
25939d6ec938SDavid S. Miller 							flowi4_to_flowi(flp4),
25949d6ec938SDavid S. Miller 							sk, 0);
25951da177e4SLinus Torvalds 
2596b23dd4feSDavid S. Miller 	return rt;
25971da177e4SLinus Torvalds }
2598d8c97a94SArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(ip_route_output_flow);
2599d8c97a94SArnaldo Carvalho de Melo 
26003765d35eSDavid Ahern /* called with rcu_read_lock held */
2601404eb77eSRoopa Prabhu static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2602404eb77eSRoopa Prabhu 			struct rtable *rt, u32 table_id, struct flowi4 *fl4,
2603404eb77eSRoopa Prabhu 			struct sk_buff *skb, u32 portid, u32 seq)
26041da177e4SLinus Torvalds {
26051da177e4SLinus Torvalds 	struct rtmsg *r;
26061da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
26072bc8ca40SSteffen Klassert 	unsigned long expires = 0;
2608f185071dSDavid S. Miller 	u32 error;
2609521f5490SJulian Anastasov 	u32 metrics[RTAX_MAX];
2610be403ea1SThomas Graf 
2611d3166e0cSDavid Ahern 	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
261251456b29SIan Morris 	if (!nlh)
261326932566SPatrick McHardy 		return -EMSGSIZE;
2614be403ea1SThomas Graf 
2615be403ea1SThomas Graf 	r = nlmsg_data(nlh);
26161da177e4SLinus Torvalds 	r->rtm_family	 = AF_INET;
26171da177e4SLinus Torvalds 	r->rtm_dst_len	= 32;
26181da177e4SLinus Torvalds 	r->rtm_src_len	= 0;
2619d6c0a4f6SDavid Miller 	r->rtm_tos	= fl4->flowi4_tos;
26208a430ed5SDavid Ahern 	r->rtm_table	= table_id < 256 ? table_id : RT_TABLE_COMPAT;
2621c36ba660SDavid Ahern 	if (nla_put_u32(skb, RTA_TABLE, table_id))
2622f3756b79SDavid S. Miller 		goto nla_put_failure;
26231da177e4SLinus Torvalds 	r->rtm_type	= rt->rt_type;
26241da177e4SLinus Torvalds 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
26251da177e4SLinus Torvalds 	r->rtm_protocol = RTPROT_UNSPEC;
26261da177e4SLinus Torvalds 	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
26271da177e4SLinus Torvalds 	if (rt->rt_flags & RTCF_NOTIFY)
26281da177e4SLinus Torvalds 		r->rtm_flags |= RTM_F_NOTIFY;
2629df4d9254SHannes Frederic Sowa 	if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2630df4d9254SHannes Frederic Sowa 		r->rtm_flags |= RTCF_DOREDIRECT;
2631be403ea1SThomas Graf 
2632930345eaSJiri Benc 	if (nla_put_in_addr(skb, RTA_DST, dst))
2633f3756b79SDavid S. Miller 		goto nla_put_failure;
26341a00fee4SDavid Miller 	if (src) {
26351da177e4SLinus Torvalds 		r->rtm_src_len = 32;
2636930345eaSJiri Benc 		if (nla_put_in_addr(skb, RTA_SRC, src))
2637f3756b79SDavid S. Miller 			goto nla_put_failure;
26381da177e4SLinus Torvalds 	}
2639f3756b79SDavid S. Miller 	if (rt->dst.dev &&
2640f3756b79SDavid S. Miller 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2641f3756b79SDavid S. Miller 		goto nla_put_failure;
2642c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
2643f3756b79SDavid S. Miller 	if (rt->dst.tclassid &&
2644f3756b79SDavid S. Miller 	    nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2645f3756b79SDavid S. Miller 		goto nla_put_failure;
26461da177e4SLinus Torvalds #endif
264741347dcdSDavid S. Miller 	if (!rt_is_input_route(rt) &&
2648d6c0a4f6SDavid Miller 	    fl4->saddr != src) {
2649930345eaSJiri Benc 		if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
2650f3756b79SDavid S. Miller 			goto nla_put_failure;
2651f3756b79SDavid S. Miller 	}
2652155e8336SJulian Anastasov 	if (rt->rt_uses_gateway &&
2653930345eaSJiri Benc 	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
2654f3756b79SDavid S. Miller 		goto nla_put_failure;
2655be403ea1SThomas Graf 
2656ee9a8f7aSSteffen Klassert 	expires = rt->dst.expires;
2657ee9a8f7aSSteffen Klassert 	if (expires) {
2658ee9a8f7aSSteffen Klassert 		unsigned long now = jiffies;
2659ee9a8f7aSSteffen Klassert 
2660ee9a8f7aSSteffen Klassert 		if (time_before(now, expires))
2661ee9a8f7aSSteffen Klassert 			expires -= now;
2662ee9a8f7aSSteffen Klassert 		else
2663ee9a8f7aSSteffen Klassert 			expires = 0;
2664ee9a8f7aSSteffen Klassert 	}
2665ee9a8f7aSSteffen Klassert 
2666521f5490SJulian Anastasov 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2667ee9a8f7aSSteffen Klassert 	if (rt->rt_pmtu && expires)
2668521f5490SJulian Anastasov 		metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2669d52e5a7eSSabrina Dubroca 	if (rt->rt_mtu_locked && expires)
2670d52e5a7eSSabrina Dubroca 		metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
2671521f5490SJulian Anastasov 	if (rtnetlink_put_metrics(skb, metrics) < 0)
2672be403ea1SThomas Graf 		goto nla_put_failure;
2673be403ea1SThomas Graf 
2674b4869889SDavid Miller 	if (fl4->flowi4_mark &&
267568aaed54Sstephen hemminger 	    nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
2676f3756b79SDavid S. Miller 		goto nla_put_failure;
2677963bfeeeSEric Dumazet 
2678622ec2c9SLorenzo Colitti 	if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2679622ec2c9SLorenzo Colitti 	    nla_put_u32(skb, RTA_UID,
2680622ec2c9SLorenzo Colitti 			from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2681622ec2c9SLorenzo Colitti 		goto nla_put_failure;
2682622ec2c9SLorenzo Colitti 
2683d8d1f30bSChangli Gao 	error = rt->dst.error;
2684be403ea1SThomas Graf 
2685c7537967SDavid S. Miller 	if (rt_is_input_route(rt)) {
26868caaf7b6SNicolas Dichtel #ifdef CONFIG_IP_MROUTE
26878caaf7b6SNicolas Dichtel 		if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
26888caaf7b6SNicolas Dichtel 		    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
26898caaf7b6SNicolas Dichtel 			int err = ipmr_get_route(net, skb,
26908caaf7b6SNicolas Dichtel 						 fl4->saddr, fl4->daddr,
26919f09eaeaSDavid Ahern 						 r, portid);
26922cf75070SNikolay Aleksandrov 
26938caaf7b6SNicolas Dichtel 			if (err <= 0) {
26948caaf7b6SNicolas Dichtel 				if (err == 0)
26958caaf7b6SNicolas Dichtel 					return 0;
26968caaf7b6SNicolas Dichtel 				goto nla_put_failure;
26978caaf7b6SNicolas Dichtel 			}
26988caaf7b6SNicolas Dichtel 		} else
26998caaf7b6SNicolas Dichtel #endif
2700404eb77eSRoopa Prabhu 			if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
2701f3756b79SDavid S. Miller 				goto nla_put_failure;
27021da177e4SLinus Torvalds 	}
27031da177e4SLinus Torvalds 
2704f185071dSDavid S. Miller 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
2705e3703b3dSThomas Graf 		goto nla_put_failure;
27061da177e4SLinus Torvalds 
2707053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
2708053c095aSJohannes Berg 	return 0;
2709be403ea1SThomas Graf 
2710be403ea1SThomas Graf nla_put_failure:
271126932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
271226932566SPatrick McHardy 	return -EMSGSIZE;
27131da177e4SLinus Torvalds }
27141da177e4SLinus Torvalds 
2715404eb77eSRoopa Prabhu static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
2716404eb77eSRoopa Prabhu 						   u8 ip_proto, __be16 sport,
2717404eb77eSRoopa Prabhu 						   __be16 dport)
2718404eb77eSRoopa Prabhu {
2719404eb77eSRoopa Prabhu 	struct sk_buff *skb;
2720404eb77eSRoopa Prabhu 	struct iphdr *iph;
2721404eb77eSRoopa Prabhu 
2722404eb77eSRoopa Prabhu 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2723404eb77eSRoopa Prabhu 	if (!skb)
2724404eb77eSRoopa Prabhu 		return NULL;
2725404eb77eSRoopa Prabhu 
2726404eb77eSRoopa Prabhu 	/* Reserve room for dummy headers, this skb can pass
2727404eb77eSRoopa Prabhu 	 * through good chunk of routing engine.
2728404eb77eSRoopa Prabhu 	 */
2729404eb77eSRoopa Prabhu 	skb_reset_mac_header(skb);
2730404eb77eSRoopa Prabhu 	skb_reset_network_header(skb);
2731404eb77eSRoopa Prabhu 	skb->protocol = htons(ETH_P_IP);
2732404eb77eSRoopa Prabhu 	iph = skb_put(skb, sizeof(struct iphdr));
2733404eb77eSRoopa Prabhu 	iph->protocol = ip_proto;
2734404eb77eSRoopa Prabhu 	iph->saddr = src;
2735404eb77eSRoopa Prabhu 	iph->daddr = dst;
2736404eb77eSRoopa Prabhu 	iph->version = 0x4;
2737404eb77eSRoopa Prabhu 	iph->frag_off = 0;
2738404eb77eSRoopa Prabhu 	iph->ihl = 0x5;
2739404eb77eSRoopa Prabhu 	skb_set_transport_header(skb, skb->len);
2740404eb77eSRoopa Prabhu 
2741404eb77eSRoopa Prabhu 	switch (iph->protocol) {
2742404eb77eSRoopa Prabhu 	case IPPROTO_UDP: {
2743404eb77eSRoopa Prabhu 		struct udphdr *udph;
2744404eb77eSRoopa Prabhu 
2745404eb77eSRoopa Prabhu 		udph = skb_put_zero(skb, sizeof(struct udphdr));
2746404eb77eSRoopa Prabhu 		udph->source = sport;
2747404eb77eSRoopa Prabhu 		udph->dest = dport;
2748404eb77eSRoopa Prabhu 		udph->len = sizeof(struct udphdr);
2749404eb77eSRoopa Prabhu 		udph->check = 0;
2750404eb77eSRoopa Prabhu 		break;
2751404eb77eSRoopa Prabhu 	}
2752404eb77eSRoopa Prabhu 	case IPPROTO_TCP: {
2753404eb77eSRoopa Prabhu 		struct tcphdr *tcph;
2754404eb77eSRoopa Prabhu 
2755404eb77eSRoopa Prabhu 		tcph = skb_put_zero(skb, sizeof(struct tcphdr));
2756404eb77eSRoopa Prabhu 		tcph->source	= sport;
2757404eb77eSRoopa Prabhu 		tcph->dest	= dport;
2758404eb77eSRoopa Prabhu 		tcph->doff	= sizeof(struct tcphdr) / 4;
2759404eb77eSRoopa Prabhu 		tcph->rst = 1;
2760404eb77eSRoopa Prabhu 		tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
2761404eb77eSRoopa Prabhu 					    src, dst, 0);
2762404eb77eSRoopa Prabhu 		break;
2763404eb77eSRoopa Prabhu 	}
2764404eb77eSRoopa Prabhu 	case IPPROTO_ICMP: {
2765404eb77eSRoopa Prabhu 		struct icmphdr *icmph;
2766404eb77eSRoopa Prabhu 
2767404eb77eSRoopa Prabhu 		icmph = skb_put_zero(skb, sizeof(struct icmphdr));
2768404eb77eSRoopa Prabhu 		icmph->type = ICMP_ECHO;
2769404eb77eSRoopa Prabhu 		icmph->code = 0;
2770404eb77eSRoopa Prabhu 	}
2771404eb77eSRoopa Prabhu 	}
2772404eb77eSRoopa Prabhu 
2773404eb77eSRoopa Prabhu 	return skb;
2774404eb77eSRoopa Prabhu }
2775404eb77eSRoopa Prabhu 
2776c21ef3e3SDavid Ahern static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2777c21ef3e3SDavid Ahern 			     struct netlink_ext_ack *extack)
27781da177e4SLinus Torvalds {
27793b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
2780d889ce3bSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
2781404eb77eSRoopa Prabhu 	u32 table_id = RT_TABLE_MAIN;
2782404eb77eSRoopa Prabhu 	__be16 sport = 0, dport = 0;
27833765d35eSDavid Ahern 	struct fib_result res = {};
2784404eb77eSRoopa Prabhu 	u8 ip_proto = IPPROTO_UDP;
27851da177e4SLinus Torvalds 	struct rtable *rt = NULL;
2786404eb77eSRoopa Prabhu 	struct sk_buff *skb;
2787404eb77eSRoopa Prabhu 	struct rtmsg *rtm;
2788d6c0a4f6SDavid Miller 	struct flowi4 fl4;
27899e12bb22SAl Viro 	__be32 dst = 0;
27909e12bb22SAl Viro 	__be32 src = 0;
2791404eb77eSRoopa Prabhu 	kuid_t uid;
27929e12bb22SAl Viro 	u32 iif;
2793d889ce3bSThomas Graf 	int err;
2794963bfeeeSEric Dumazet 	int mark;
27951da177e4SLinus Torvalds 
2796fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
2797c21ef3e3SDavid Ahern 			  extack);
2798d889ce3bSThomas Graf 	if (err < 0)
2799404eb77eSRoopa Prabhu 		return err;
2800d889ce3bSThomas Graf 
2801d889ce3bSThomas Graf 	rtm = nlmsg_data(nlh);
280267b61f6cSJiri Benc 	src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
280367b61f6cSJiri Benc 	dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
2804d889ce3bSThomas Graf 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2805963bfeeeSEric Dumazet 	mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2806622ec2c9SLorenzo Colitti 	if (tb[RTA_UID])
2807622ec2c9SLorenzo Colitti 		uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2808622ec2c9SLorenzo Colitti 	else
2809622ec2c9SLorenzo Colitti 		uid = (iif ? INVALID_UID : current_uid());
28101da177e4SLinus Torvalds 
2811404eb77eSRoopa Prabhu 	if (tb[RTA_IP_PROTO]) {
2812404eb77eSRoopa Prabhu 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
2813404eb77eSRoopa Prabhu 						  &ip_proto, extack);
2814404eb77eSRoopa Prabhu 		if (err)
2815404eb77eSRoopa Prabhu 			return err;
2816404eb77eSRoopa Prabhu 	}
2817bbadb9a2SFlorian Larysch 
2818404eb77eSRoopa Prabhu 	if (tb[RTA_SPORT])
2819404eb77eSRoopa Prabhu 		sport = nla_get_be16(tb[RTA_SPORT]);
2820404eb77eSRoopa Prabhu 
2821404eb77eSRoopa Prabhu 	if (tb[RTA_DPORT])
2822404eb77eSRoopa Prabhu 		dport = nla_get_be16(tb[RTA_DPORT]);
2823404eb77eSRoopa Prabhu 
2824404eb77eSRoopa Prabhu 	skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
2825404eb77eSRoopa Prabhu 	if (!skb)
2826404eb77eSRoopa Prabhu 		return -ENOBUFS;
2827bbadb9a2SFlorian Larysch 
2828d6c0a4f6SDavid Miller 	memset(&fl4, 0, sizeof(fl4));
2829d6c0a4f6SDavid Miller 	fl4.daddr = dst;
2830d6c0a4f6SDavid Miller 	fl4.saddr = src;
2831d6c0a4f6SDavid Miller 	fl4.flowi4_tos = rtm->rtm_tos;
2832d6c0a4f6SDavid Miller 	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2833d6c0a4f6SDavid Miller 	fl4.flowi4_mark = mark;
2834622ec2c9SLorenzo Colitti 	fl4.flowi4_uid = uid;
2835404eb77eSRoopa Prabhu 	if (sport)
2836404eb77eSRoopa Prabhu 		fl4.fl4_sport = sport;
2837404eb77eSRoopa Prabhu 	if (dport)
2838404eb77eSRoopa Prabhu 		fl4.fl4_dport = dport;
2839404eb77eSRoopa Prabhu 	fl4.flowi4_proto = ip_proto;
2840d6c0a4f6SDavid Miller 
28413765d35eSDavid Ahern 	rcu_read_lock();
28423765d35eSDavid Ahern 
28431da177e4SLinus Torvalds 	if (iif) {
2844d889ce3bSThomas Graf 		struct net_device *dev;
2845d889ce3bSThomas Graf 
28463765d35eSDavid Ahern 		dev = dev_get_by_index_rcu(net, iif);
284751456b29SIan Morris 		if (!dev) {
28481da177e4SLinus Torvalds 			err = -ENODEV;
2849404eb77eSRoopa Prabhu 			goto errout_rcu;
2850d889ce3bSThomas Graf 		}
2851d889ce3bSThomas Graf 
2852404eb77eSRoopa Prabhu 		fl4.flowi4_iif = iif; /* for rt_fill_info */
28531da177e4SLinus Torvalds 		skb->dev	= dev;
2854963bfeeeSEric Dumazet 		skb->mark	= mark;
28553765d35eSDavid Ahern 		err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
28563765d35eSDavid Ahern 					 dev, &res);
2857d889ce3bSThomas Graf 
2858511c3f92SEric Dumazet 		rt = skb_rtable(skb);
2859d8d1f30bSChangli Gao 		if (err == 0 && rt->dst.error)
2860d8d1f30bSChangli Gao 			err = -rt->dst.error;
28611da177e4SLinus Torvalds 	} else {
28626503a304SLorenzo Colitti 		fl4.flowi4_iif = LOOPBACK_IFINDEX;
28633765d35eSDavid Ahern 		rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
2864b23dd4feSDavid S. Miller 		err = 0;
2865b23dd4feSDavid S. Miller 		if (IS_ERR(rt))
2866b23dd4feSDavid S. Miller 			err = PTR_ERR(rt);
28672c87d63aSFlorian Westphal 		else
28682c87d63aSFlorian Westphal 			skb_dst_set(skb, &rt->dst);
28691da177e4SLinus Torvalds 	}
2870d889ce3bSThomas Graf 
28711da177e4SLinus Torvalds 	if (err)
2872404eb77eSRoopa Prabhu 		goto errout_rcu;
28731da177e4SLinus Torvalds 
28741da177e4SLinus Torvalds 	if (rtm->rtm_flags & RTM_F_NOTIFY)
28751da177e4SLinus Torvalds 		rt->rt_flags |= RTCF_NOTIFY;
28761da177e4SLinus Torvalds 
2877c36ba660SDavid Ahern 	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
287868e813aaSDavid Ahern 		table_id = res.table ? res.table->tb_id : 0;
2879c36ba660SDavid Ahern 
2880404eb77eSRoopa Prabhu 	/* reset skb for netlink reply msg */
2881404eb77eSRoopa Prabhu 	skb_trim(skb, 0);
2882404eb77eSRoopa Prabhu 	skb_reset_network_header(skb);
2883404eb77eSRoopa Prabhu 	skb_reset_transport_header(skb);
2884404eb77eSRoopa Prabhu 	skb_reset_mac_header(skb);
2885404eb77eSRoopa Prabhu 
2886bc3aae2bSRoopa Prabhu 	if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
2887bc3aae2bSRoopa Prabhu 		if (!res.fi) {
2888bc3aae2bSRoopa Prabhu 			err = fib_props[res.type].error;
2889bc3aae2bSRoopa Prabhu 			if (!err)
2890bc3aae2bSRoopa Prabhu 				err = -EHOSTUNREACH;
2891404eb77eSRoopa Prabhu 			goto errout_rcu;
2892bc3aae2bSRoopa Prabhu 		}
2893b6179813SRoopa Prabhu 		err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
2894b6179813SRoopa Prabhu 				    nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
2895b6179813SRoopa Prabhu 				    rt->rt_type, res.prefix, res.prefixlen,
2896b6179813SRoopa Prabhu 				    fl4.flowi4_tos, res.fi, 0);
2897bc3aae2bSRoopa Prabhu 	} else {
2898404eb77eSRoopa Prabhu 		err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
2899ba52d61eSRoopa Prabhu 				   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
2900bc3aae2bSRoopa Prabhu 	}
29017b46a644SDavid S. Miller 	if (err < 0)
2902404eb77eSRoopa Prabhu 		goto errout_rcu;
29031da177e4SLinus Torvalds 
29043765d35eSDavid Ahern 	rcu_read_unlock();
29053765d35eSDavid Ahern 
290615e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
29071da177e4SLinus Torvalds 
2908d889ce3bSThomas Graf errout_free:
2909404eb77eSRoopa Prabhu 	return err;
2910404eb77eSRoopa Prabhu errout_rcu:
29113765d35eSDavid Ahern 	rcu_read_unlock();
29121da177e4SLinus Torvalds 	kfree_skb(skb);
2913404eb77eSRoopa Prabhu 	goto errout_free;
29141da177e4SLinus Torvalds }
29151da177e4SLinus Torvalds 
29161da177e4SLinus Torvalds void ip_rt_multicast_event(struct in_device *in_dev)
29171da177e4SLinus Torvalds {
29184ccfe6d4SNicolas Dichtel 	rt_cache_flush(dev_net(in_dev->dev));
29191da177e4SLinus Torvalds }
29201da177e4SLinus Torvalds 
29211da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
2922082c7ca4SGao feng static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
2923082c7ca4SGao feng static int ip_rt_gc_min_interval __read_mostly	= HZ / 2;
2924082c7ca4SGao feng static int ip_rt_gc_elasticity __read_mostly	= 8;
2925773daa3cSArnd Bergmann static int ip_min_valid_pmtu __read_mostly	= IPV4_MIN_MTU;
2926082c7ca4SGao feng 
2927fe2c6338SJoe Perches static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
29288d65af78SAlexey Dobriyan 					void __user *buffer,
29291da177e4SLinus Torvalds 					size_t *lenp, loff_t *ppos)
29301da177e4SLinus Torvalds {
29315aad1de5STimo Teräs 	struct net *net = (struct net *)__ctl->extra1;
29325aad1de5STimo Teräs 
29331da177e4SLinus Torvalds 	if (write) {
29345aad1de5STimo Teräs 		rt_cache_flush(net);
29355aad1de5STimo Teräs 		fnhe_genid_bump(net);
29361da177e4SLinus Torvalds 		return 0;
29371da177e4SLinus Torvalds 	}
29381da177e4SLinus Torvalds 
29391da177e4SLinus Torvalds 	return -EINVAL;
29401da177e4SLinus Torvalds }
29411da177e4SLinus Torvalds 
2942fe2c6338SJoe Perches static struct ctl_table ipv4_route_table[] = {
29431da177e4SLinus Torvalds 	{
29441da177e4SLinus Torvalds 		.procname	= "gc_thresh",
29451da177e4SLinus Torvalds 		.data		= &ipv4_dst_ops.gc_thresh,
29461da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
29471da177e4SLinus Torvalds 		.mode		= 0644,
29486d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
29491da177e4SLinus Torvalds 	},
29501da177e4SLinus Torvalds 	{
29511da177e4SLinus Torvalds 		.procname	= "max_size",
29521da177e4SLinus Torvalds 		.data		= &ip_rt_max_size,
29531da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
29541da177e4SLinus Torvalds 		.mode		= 0644,
29556d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
29561da177e4SLinus Torvalds 	},
29571da177e4SLinus Torvalds 	{
29581da177e4SLinus Torvalds 		/*  Deprecated. Use gc_min_interval_ms */
29591da177e4SLinus Torvalds 
29601da177e4SLinus Torvalds 		.procname	= "gc_min_interval",
29611da177e4SLinus Torvalds 		.data		= &ip_rt_gc_min_interval,
29621da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
29631da177e4SLinus Torvalds 		.mode		= 0644,
29646d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec_jiffies,
29651da177e4SLinus Torvalds 	},
29661da177e4SLinus Torvalds 	{
29671da177e4SLinus Torvalds 		.procname	= "gc_min_interval_ms",
29681da177e4SLinus Torvalds 		.data		= &ip_rt_gc_min_interval,
29691da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
29701da177e4SLinus Torvalds 		.mode		= 0644,
29716d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec_ms_jiffies,
29721da177e4SLinus Torvalds 	},
29731da177e4SLinus Torvalds 	{
29741da177e4SLinus Torvalds 		.procname	= "gc_timeout",
29751da177e4SLinus Torvalds 		.data		= &ip_rt_gc_timeout,
29761da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
29771da177e4SLinus Torvalds 		.mode		= 0644,
29786d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec_jiffies,
29791da177e4SLinus Torvalds 	},
29801da177e4SLinus Torvalds 	{
29819f28a2fcSEric Dumazet 		.procname	= "gc_interval",
29829f28a2fcSEric Dumazet 		.data		= &ip_rt_gc_interval,
29839f28a2fcSEric Dumazet 		.maxlen		= sizeof(int),
29849f28a2fcSEric Dumazet 		.mode		= 0644,
29859f28a2fcSEric Dumazet 		.proc_handler	= proc_dointvec_jiffies,
29869f28a2fcSEric Dumazet 	},
29879f28a2fcSEric Dumazet 	{
29881da177e4SLinus Torvalds 		.procname	= "redirect_load",
29891da177e4SLinus Torvalds 		.data		= &ip_rt_redirect_load,
29901da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
29911da177e4SLinus Torvalds 		.mode		= 0644,
29926d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
29931da177e4SLinus Torvalds 	},
29941da177e4SLinus Torvalds 	{
29951da177e4SLinus Torvalds 		.procname	= "redirect_number",
29961da177e4SLinus Torvalds 		.data		= &ip_rt_redirect_number,
29971da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
29981da177e4SLinus Torvalds 		.mode		= 0644,
29996d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
30001da177e4SLinus Torvalds 	},
30011da177e4SLinus Torvalds 	{
30021da177e4SLinus Torvalds 		.procname	= "redirect_silence",
30031da177e4SLinus Torvalds 		.data		= &ip_rt_redirect_silence,
30041da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
30051da177e4SLinus Torvalds 		.mode		= 0644,
30066d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
30071da177e4SLinus Torvalds 	},
30081da177e4SLinus Torvalds 	{
30091da177e4SLinus Torvalds 		.procname	= "error_cost",
30101da177e4SLinus Torvalds 		.data		= &ip_rt_error_cost,
30111da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
30121da177e4SLinus Torvalds 		.mode		= 0644,
30136d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
30141da177e4SLinus Torvalds 	},
30151da177e4SLinus Torvalds 	{
30161da177e4SLinus Torvalds 		.procname	= "error_burst",
30171da177e4SLinus Torvalds 		.data		= &ip_rt_error_burst,
30181da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
30191da177e4SLinus Torvalds 		.mode		= 0644,
30206d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
30211da177e4SLinus Torvalds 	},
30221da177e4SLinus Torvalds 	{
30231da177e4SLinus Torvalds 		.procname	= "gc_elasticity",
30241da177e4SLinus Torvalds 		.data		= &ip_rt_gc_elasticity,
30251da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
30261da177e4SLinus Torvalds 		.mode		= 0644,
30276d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
30281da177e4SLinus Torvalds 	},
30291da177e4SLinus Torvalds 	{
30301da177e4SLinus Torvalds 		.procname	= "mtu_expires",
30311da177e4SLinus Torvalds 		.data		= &ip_rt_mtu_expires,
30321da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
30331da177e4SLinus Torvalds 		.mode		= 0644,
30346d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec_jiffies,
30351da177e4SLinus Torvalds 	},
30361da177e4SLinus Torvalds 	{
30371da177e4SLinus Torvalds 		.procname	= "min_pmtu",
30381da177e4SLinus Torvalds 		.data		= &ip_rt_min_pmtu,
30391da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
30401da177e4SLinus Torvalds 		.mode		= 0644,
3041c7272c2fSSabrina Dubroca 		.proc_handler	= proc_dointvec_minmax,
3042c7272c2fSSabrina Dubroca 		.extra1		= &ip_min_valid_pmtu,
30431da177e4SLinus Torvalds 	},
30441da177e4SLinus Torvalds 	{
30451da177e4SLinus Torvalds 		.procname	= "min_adv_mss",
30461da177e4SLinus Torvalds 		.data		= &ip_rt_min_advmss,
30471da177e4SLinus Torvalds 		.maxlen		= sizeof(int),
30481da177e4SLinus Torvalds 		.mode		= 0644,
30496d9f239aSAlexey Dobriyan 		.proc_handler	= proc_dointvec,
30501da177e4SLinus Torvalds 	},
3051f8572d8fSEric W. Biederman 	{ }
30521da177e4SLinus Torvalds };
305339a23e75SDenis V. Lunev 
305439a23e75SDenis V. Lunev static struct ctl_table ipv4_route_flush_table[] = {
305539a23e75SDenis V. Lunev 	{
305639a23e75SDenis V. Lunev 		.procname	= "flush",
305739a23e75SDenis V. Lunev 		.maxlen		= sizeof(int),
305839a23e75SDenis V. Lunev 		.mode		= 0200,
30596d9f239aSAlexey Dobriyan 		.proc_handler	= ipv4_sysctl_rtcache_flush,
306039a23e75SDenis V. Lunev 	},
3061f8572d8fSEric W. Biederman 	{ },
306239a23e75SDenis V. Lunev };
306339a23e75SDenis V. Lunev 
306439a23e75SDenis V. Lunev static __net_init int sysctl_route_net_init(struct net *net)
306539a23e75SDenis V. Lunev {
306639a23e75SDenis V. Lunev 	struct ctl_table *tbl;
306739a23e75SDenis V. Lunev 
306839a23e75SDenis V. Lunev 	tbl = ipv4_route_flush_table;
306909ad9bc7SOctavian Purdila 	if (!net_eq(net, &init_net)) {
307039a23e75SDenis V. Lunev 		tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
307151456b29SIan Morris 		if (!tbl)
307239a23e75SDenis V. Lunev 			goto err_dup;
3073464dc801SEric W. Biederman 
3074464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
3075464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
3076464dc801SEric W. Biederman 			tbl[0].procname = NULL;
307739a23e75SDenis V. Lunev 	}
307839a23e75SDenis V. Lunev 	tbl[0].extra1 = net;
307939a23e75SDenis V. Lunev 
3080ec8f23ceSEric W. Biederman 	net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
308151456b29SIan Morris 	if (!net->ipv4.route_hdr)
308239a23e75SDenis V. Lunev 		goto err_reg;
308339a23e75SDenis V. Lunev 	return 0;
308439a23e75SDenis V. Lunev 
308539a23e75SDenis V. Lunev err_reg:
308639a23e75SDenis V. Lunev 	if (tbl != ipv4_route_flush_table)
308739a23e75SDenis V. Lunev 		kfree(tbl);
308839a23e75SDenis V. Lunev err_dup:
308939a23e75SDenis V. Lunev 	return -ENOMEM;
309039a23e75SDenis V. Lunev }
309139a23e75SDenis V. Lunev 
309239a23e75SDenis V. Lunev static __net_exit void sysctl_route_net_exit(struct net *net)
309339a23e75SDenis V. Lunev {
309439a23e75SDenis V. Lunev 	struct ctl_table *tbl;
309539a23e75SDenis V. Lunev 
309639a23e75SDenis V. Lunev 	tbl = net->ipv4.route_hdr->ctl_table_arg;
309739a23e75SDenis V. Lunev 	unregister_net_sysctl_table(net->ipv4.route_hdr);
309839a23e75SDenis V. Lunev 	BUG_ON(tbl == ipv4_route_flush_table);
309939a23e75SDenis V. Lunev 	kfree(tbl);
310039a23e75SDenis V. Lunev }
310139a23e75SDenis V. Lunev 
310239a23e75SDenis V. Lunev static __net_initdata struct pernet_operations sysctl_route_ops = {
310339a23e75SDenis V. Lunev 	.init = sysctl_route_net_init,
310439a23e75SDenis V. Lunev 	.exit = sysctl_route_net_exit,
310539a23e75SDenis V. Lunev };
31061da177e4SLinus Torvalds #endif
31071da177e4SLinus Torvalds 
31083ee94372SNeil Horman static __net_init int rt_genid_init(struct net *net)
31099f5e97e5SDenis V. Lunev {
3110ca4c3fc2Sfan.du 	atomic_set(&net->ipv4.rt_genid, 0);
31115aad1de5STimo Teräs 	atomic_set(&net->fnhe_genid, 0);
31127aed9f72SJason A. Donenfeld 	atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
31139f5e97e5SDenis V. Lunev 	return 0;
31149f5e97e5SDenis V. Lunev }
31159f5e97e5SDenis V. Lunev 
31163ee94372SNeil Horman static __net_initdata struct pernet_operations rt_genid_ops = {
31173ee94372SNeil Horman 	.init = rt_genid_init,
31189f5e97e5SDenis V. Lunev };
31199f5e97e5SDenis V. Lunev 
3120c3426b47SDavid S. Miller static int __net_init ipv4_inetpeer_init(struct net *net)
3121c3426b47SDavid S. Miller {
3122c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3123c3426b47SDavid S. Miller 
3124c3426b47SDavid S. Miller 	if (!bp)
3125c3426b47SDavid S. Miller 		return -ENOMEM;
3126c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
3127c3426b47SDavid S. Miller 	net->ipv4.peers = bp;
3128c3426b47SDavid S. Miller 	return 0;
3129c3426b47SDavid S. Miller }
3130c3426b47SDavid S. Miller 
3131c3426b47SDavid S. Miller static void __net_exit ipv4_inetpeer_exit(struct net *net)
3132c3426b47SDavid S. Miller {
3133c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv4.peers;
3134c3426b47SDavid S. Miller 
3135c3426b47SDavid S. Miller 	net->ipv4.peers = NULL;
313656a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
3137c3426b47SDavid S. Miller 	kfree(bp);
3138c3426b47SDavid S. Miller }
3139c3426b47SDavid S. Miller 
3140c3426b47SDavid S. Miller static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
3141c3426b47SDavid S. Miller 	.init	=	ipv4_inetpeer_init,
3142c3426b47SDavid S. Miller 	.exit	=	ipv4_inetpeer_exit,
3143c3426b47SDavid S. Miller };
31449f5e97e5SDenis V. Lunev 
3145c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
31467d720c3eSTejun Heo struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
3147c7066f70SPatrick McHardy #endif /* CONFIG_IP_ROUTE_CLASSID */
31481da177e4SLinus Torvalds 
31491da177e4SLinus Torvalds int __init ip_rt_init(void)
31501da177e4SLinus Torvalds {
31515055c371SEric Dumazet 	int cpu;
31521da177e4SLinus Torvalds 
31536da2ec56SKees Cook 	ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
31546da2ec56SKees Cook 				  GFP_KERNEL);
315573f156a6SEric Dumazet 	if (!ip_idents)
315673f156a6SEric Dumazet 		panic("IP: failed to allocate ip_idents\n");
315773f156a6SEric Dumazet 
315873f156a6SEric Dumazet 	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
315973f156a6SEric Dumazet 
3160355b590cSEric Dumazet 	ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
3161355b590cSEric Dumazet 	if (!ip_tstamps)
3162355b590cSEric Dumazet 		panic("IP: failed to allocate ip_tstamps\n");
3163355b590cSEric Dumazet 
31645055c371SEric Dumazet 	for_each_possible_cpu(cpu) {
31655055c371SEric Dumazet 		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
31665055c371SEric Dumazet 
31675055c371SEric Dumazet 		INIT_LIST_HEAD(&ul->head);
31685055c371SEric Dumazet 		spin_lock_init(&ul->lock);
31695055c371SEric Dumazet 	}
3170c7066f70SPatrick McHardy #ifdef CONFIG_IP_ROUTE_CLASSID
31710dcec8c2SIngo Molnar 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
31721da177e4SLinus Torvalds 	if (!ip_rt_acct)
31731da177e4SLinus Torvalds 		panic("IP: failed to allocate ip_rt_acct\n");
31741da177e4SLinus Torvalds #endif
31751da177e4SLinus Torvalds 
3176e5d679f3SAlexey Dobriyan 	ipv4_dst_ops.kmem_cachep =
3177e5d679f3SAlexey Dobriyan 		kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
317820c2df83SPaul Mundt 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
31791da177e4SLinus Torvalds 
318014e50e57SDavid S. Miller 	ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
318114e50e57SDavid S. Miller 
3182fc66f95cSEric Dumazet 	if (dst_entries_init(&ipv4_dst_ops) < 0)
3183fc66f95cSEric Dumazet 		panic("IP: failed to allocate ipv4_dst_ops counter\n");
3184fc66f95cSEric Dumazet 
3185fc66f95cSEric Dumazet 	if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3186fc66f95cSEric Dumazet 		panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3187fc66f95cSEric Dumazet 
318889aef892SDavid S. Miller 	ipv4_dst_ops.gc_thresh = ~0;
318989aef892SDavid S. Miller 	ip_rt_max_size = INT_MAX;
31901da177e4SLinus Torvalds 
31911da177e4SLinus Torvalds 	devinet_init();
31921da177e4SLinus Torvalds 	ip_fib_init();
31931da177e4SLinus Torvalds 
319473b38711SDenis V. Lunev 	if (ip_rt_proc_init())
3195058bd4d2SJoe Perches 		pr_err("Unable to create route proc files\n");
31961da177e4SLinus Torvalds #ifdef CONFIG_XFRM
31971da177e4SLinus Torvalds 	xfrm_init();
3198703fb94eSSteffen Klassert 	xfrm4_init();
31991da177e4SLinus Torvalds #endif
3200394f51abSFlorian Westphal 	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
3201394f51abSFlorian Westphal 		      RTNL_FLAG_DOIT_UNLOCKED);
320263f3444fSThomas Graf 
320339a23e75SDenis V. Lunev #ifdef CONFIG_SYSCTL
320439a23e75SDenis V. Lunev 	register_pernet_subsys(&sysctl_route_ops);
320539a23e75SDenis V. Lunev #endif
32063ee94372SNeil Horman 	register_pernet_subsys(&rt_genid_ops);
3207c3426b47SDavid S. Miller 	register_pernet_subsys(&ipv4_inetpeer_ops);
32081bcdca3fSTim Hansen 	return 0;
32091da177e4SLinus Torvalds }
32101da177e4SLinus Torvalds 
3211a1bc6eb4SAl Viro #ifdef CONFIG_SYSCTL
3212eeb61f71SAl Viro /*
3213eeb61f71SAl Viro  * We really need to sanitize the damn ipv4 init order, then all
3214eeb61f71SAl Viro  * this nonsense will go away.
3215eeb61f71SAl Viro  */
3216eeb61f71SAl Viro void __init ip_static_sysctl_init(void)
3217eeb61f71SAl Viro {
32184e5ca785SEric W. Biederman 	register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
3219eeb61f71SAl Viro }
3220a1bc6eb4SAl Viro #endif
3221