xref: /linux/net/ipv4/route.c (revision 7f3edee81fbd49114c28057512906f169caa0bed)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		ROUTE - implementation of the IP router.
7  *
8  * Version:	$Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *		Linus Torvalds, <Linus.Torvalds@helsinki.fi>
14  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
15  *
16  * Fixes:
17  *		Alan Cox	:	Verify area fixes.
18  *		Alan Cox	:	cli() protects routing changes
19  *		Rui Oliveira	:	ICMP routing table updates
20  *		(rco@di.uminho.pt)	Routing table insertion and update
21  *		Linus Torvalds	:	Rewrote bits to be sensible
22  *		Alan Cox	:	Added BSD route gw semantics
23  *		Alan Cox	:	Super /proc >4K
24  *		Alan Cox	:	MTU in route table
25  *		Alan Cox	: 	MSS actually. Also added the window
26  *					clamper.
27  *		Sam Lantinga	:	Fixed route matching in rt_del()
28  *		Alan Cox	:	Routing cache support.
29  *		Alan Cox	:	Removed compatibility cruft.
30  *		Alan Cox	:	RTF_REJECT support.
31  *		Alan Cox	:	TCP irtt support.
32  *		Jonathan Naylor	:	Added Metric support.
33  *	Miquel van Smoorenburg	:	BSD API fixes.
34  *	Miquel van Smoorenburg	:	Metrics.
35  *		Alan Cox	:	Use __u32 properly
36  *		Alan Cox	:	Aligned routing errors more closely with BSD
37  *					our system is still very different.
38  *		Alan Cox	:	Faster /proc handling
39  *	Alexey Kuznetsov	:	Massive rework to support tree based routing,
40  *					routing caches and better behaviour.
41  *
42  *		Olaf Erb	:	irtt wasn't being copied right.
43  *		Bjorn Ekwall	:	Kerneld route support.
44  *		Alan Cox	:	Multicast fixed (I hope)
45  * 		Pavel Krauz	:	Limited broadcast fixed
46  *		Mike McLagan	:	Routing by source
47  *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
48  *					route.c and rewritten from scratch.
49  *		Andi Kleen	:	Load-limit warning messages.
50  *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
51  *	Vitaly E. Lavrov	:	Race condition in ip_route_input_slow.
52  *	Tobias Ringstrom	:	Uninitialized res.type in ip_route_output_slow.
53  *	Vladimir V. Ivanov	:	IP rule info (flowid) is really useful.
54  *		Marc Boucher	:	routing by fwmark
55  *	Robert Olsson		:	Added rt_cache statistics
56  *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
57  *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
58  * 	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
59  * 	Ilia Sotnikov		:	Removed TOS from hash calculations
60  *
61  *		This program is free software; you can redistribute it and/or
62  *		modify it under the terms of the GNU General Public License
63  *		as published by the Free Software Foundation; either version
64  *		2 of the License, or (at your option) any later version.
65  */
66 
67 #include <linux/module.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
70 #include <linux/bitops.h>
71 #include <linux/types.h>
72 #include <linux/kernel.h>
73 #include <linux/mm.h>
74 #include <linux/bootmem.h>
75 #include <linux/string.h>
76 #include <linux/socket.h>
77 #include <linux/sockios.h>
78 #include <linux/errno.h>
79 #include <linux/in.h>
80 #include <linux/inet.h>
81 #include <linux/netdevice.h>
82 #include <linux/proc_fs.h>
83 #include <linux/init.h>
84 #include <linux/workqueue.h>
85 #include <linux/skbuff.h>
86 #include <linux/inetdevice.h>
87 #include <linux/igmp.h>
88 #include <linux/pkt_sched.h>
89 #include <linux/mroute.h>
90 #include <linux/netfilter_ipv4.h>
91 #include <linux/random.h>
92 #include <linux/jhash.h>
93 #include <linux/rcupdate.h>
94 #include <linux/times.h>
95 #include <net/net_namespace.h>
96 #include <net/protocol.h>
97 #include <net/ip.h>
98 #include <net/route.h>
99 #include <net/inetpeer.h>
100 #include <net/sock.h>
101 #include <net/ip_fib.h>
102 #include <net/arp.h>
103 #include <net/tcp.h>
104 #include <net/icmp.h>
105 #include <net/xfrm.h>
106 #include <net/netevent.h>
107 #include <net/rtnetlink.h>
108 #ifdef CONFIG_SYSCTL
109 #include <linux/sysctl.h>
110 #endif
111 
112 #define RT_FL_TOS(oldflp) \
113     ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
114 
115 #define IP_MAX_MTU	0xFFF0
116 
117 #define RT_GC_TIMEOUT (300*HZ)
118 
119 static int ip_rt_min_delay		= 2 * HZ;
120 static int ip_rt_max_delay		= 10 * HZ;
121 static int ip_rt_max_size;
122 static int ip_rt_gc_timeout		= RT_GC_TIMEOUT;
123 static int ip_rt_gc_interval		= 60 * HZ;
124 static int ip_rt_gc_min_interval	= HZ / 2;
125 static int ip_rt_redirect_number	= 9;
126 static int ip_rt_redirect_load		= HZ / 50;
127 static int ip_rt_redirect_silence	= ((HZ / 50) << (9 + 1));
128 static int ip_rt_error_cost		= HZ;
129 static int ip_rt_error_burst		= 5 * HZ;
130 static int ip_rt_gc_elasticity		= 8;
131 static int ip_rt_mtu_expires		= 10 * 60 * HZ;
132 static int ip_rt_min_pmtu		= 512 + 20 + 20;
133 static int ip_rt_min_advmss		= 256;
134 static int ip_rt_secret_interval	= 10 * 60 * HZ;
135 static unsigned long rt_deadline;
136 
137 #define RTprint(a...)	printk(KERN_DEBUG a)
138 
139 static struct timer_list rt_flush_timer;
140 static void rt_check_expire(struct work_struct *work);
141 static DECLARE_DELAYED_WORK(expires_work, rt_check_expire);
142 static struct timer_list rt_secret_timer;
143 
144 /*
145  *	Interface to generic destination cache.
146  */
147 
148 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
149 static void		 ipv4_dst_destroy(struct dst_entry *dst);
150 static void		 ipv4_dst_ifdown(struct dst_entry *dst,
151 					 struct net_device *dev, int how);
152 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
153 static void		 ipv4_link_failure(struct sk_buff *skb);
154 static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
155 static int rt_garbage_collect(void);
156 
157 
158 static struct dst_ops ipv4_dst_ops = {
159 	.family =		AF_INET,
160 	.protocol =		__constant_htons(ETH_P_IP),
161 	.gc =			rt_garbage_collect,
162 	.check =		ipv4_dst_check,
163 	.destroy =		ipv4_dst_destroy,
164 	.ifdown =		ipv4_dst_ifdown,
165 	.negative_advice =	ipv4_negative_advice,
166 	.link_failure =		ipv4_link_failure,
167 	.update_pmtu =		ip_rt_update_pmtu,
168 	.entry_size =		sizeof(struct rtable),
169 };
170 
171 #define ECN_OR_COST(class)	TC_PRIO_##class
172 
173 const __u8 ip_tos2prio[16] = {
174 	TC_PRIO_BESTEFFORT,
175 	ECN_OR_COST(FILLER),
176 	TC_PRIO_BESTEFFORT,
177 	ECN_OR_COST(BESTEFFORT),
178 	TC_PRIO_BULK,
179 	ECN_OR_COST(BULK),
180 	TC_PRIO_BULK,
181 	ECN_OR_COST(BULK),
182 	TC_PRIO_INTERACTIVE,
183 	ECN_OR_COST(INTERACTIVE),
184 	TC_PRIO_INTERACTIVE,
185 	ECN_OR_COST(INTERACTIVE),
186 	TC_PRIO_INTERACTIVE_BULK,
187 	ECN_OR_COST(INTERACTIVE_BULK),
188 	TC_PRIO_INTERACTIVE_BULK,
189 	ECN_OR_COST(INTERACTIVE_BULK)
190 };
191 
192 
193 /*
194  * Route cache.
195  */
196 
197 /* The locking scheme is rather straight forward:
198  *
199  * 1) Read-Copy Update protects the buckets of the central route hash.
200  * 2) Only writers remove entries, and they hold the lock
201  *    as they look at rtable reference counts.
202  * 3) Only readers acquire references to rtable entries,
203  *    they do so with atomic increments and with the
204  *    lock held.
205  */
206 
207 struct rt_hash_bucket {
208 	struct rtable	*chain;
209 };
210 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
211 	defined(CONFIG_PROVE_LOCKING)
212 /*
213  * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
214  * The size of this table is a power of two and depends on the number of CPUS.
215  * (on lockdep we have a quite big spinlock_t, so keep the size down there)
216  */
217 #ifdef CONFIG_LOCKDEP
218 # define RT_HASH_LOCK_SZ	256
219 #else
220 # if NR_CPUS >= 32
221 #  define RT_HASH_LOCK_SZ	4096
222 # elif NR_CPUS >= 16
223 #  define RT_HASH_LOCK_SZ	2048
224 # elif NR_CPUS >= 8
225 #  define RT_HASH_LOCK_SZ	1024
226 # elif NR_CPUS >= 4
227 #  define RT_HASH_LOCK_SZ	512
228 # else
229 #  define RT_HASH_LOCK_SZ	256
230 # endif
231 #endif
232 
233 static spinlock_t	*rt_hash_locks;
234 # define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
235 # define rt_hash_lock_init()	{ \
236 		int i; \
237 		rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
238 		if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
239 		for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
240 			spin_lock_init(&rt_hash_locks[i]); \
241 		}
242 #else
243 # define rt_hash_lock_addr(slot) NULL
244 # define rt_hash_lock_init()
245 #endif
246 
247 static struct rt_hash_bucket 	*rt_hash_table;
248 static unsigned			rt_hash_mask;
249 static unsigned int		rt_hash_log;
250 static unsigned int		rt_hash_rnd;
251 
252 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
253 #define RT_CACHE_STAT_INC(field) \
254 	(__raw_get_cpu_var(rt_cache_stat).field++)
255 
256 static int rt_intern_hash(unsigned hash, struct rtable *rth,
257 				struct rtable **res);
258 
259 static unsigned int rt_hash_code(u32 daddr, u32 saddr)
260 {
261 	return (jhash_2words(daddr, saddr, rt_hash_rnd)
262 		& rt_hash_mask);
263 }
264 
265 #define rt_hash(daddr, saddr, idx) \
266 	rt_hash_code((__force u32)(__be32)(daddr),\
267 		     (__force u32)(__be32)(saddr) ^ ((idx) << 5))
268 
269 #ifdef CONFIG_PROC_FS
270 struct rt_cache_iter_state {
271 	int bucket;
272 };
273 
274 static struct rtable *rt_cache_get_first(struct seq_file *seq)
275 {
276 	struct rtable *r = NULL;
277 	struct rt_cache_iter_state *st = seq->private;
278 
279 	for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
280 		rcu_read_lock_bh();
281 		r = rt_hash_table[st->bucket].chain;
282 		if (r)
283 			break;
284 		rcu_read_unlock_bh();
285 	}
286 	return rcu_dereference(r);
287 }
288 
289 static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
290 {
291 	struct rt_cache_iter_state *st = seq->private;
292 
293 	r = r->u.dst.rt_next;
294 	while (!r) {
295 		rcu_read_unlock_bh();
296 		if (--st->bucket < 0)
297 			break;
298 		rcu_read_lock_bh();
299 		r = rt_hash_table[st->bucket].chain;
300 	}
301 	return rcu_dereference(r);
302 }
303 
304 static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
305 {
306 	struct rtable *r = rt_cache_get_first(seq);
307 
308 	if (r)
309 		while (pos && (r = rt_cache_get_next(seq, r)))
310 			--pos;
311 	return pos ? NULL : r;
312 }
313 
314 static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
315 {
316 	return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
317 }
318 
319 static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
320 {
321 	struct rtable *r = NULL;
322 
323 	if (v == SEQ_START_TOKEN)
324 		r = rt_cache_get_first(seq);
325 	else
326 		r = rt_cache_get_next(seq, v);
327 	++*pos;
328 	return r;
329 }
330 
331 static void rt_cache_seq_stop(struct seq_file *seq, void *v)
332 {
333 	if (v && v != SEQ_START_TOKEN)
334 		rcu_read_unlock_bh();
335 }
336 
337 static int rt_cache_seq_show(struct seq_file *seq, void *v)
338 {
339 	if (v == SEQ_START_TOKEN)
340 		seq_printf(seq, "%-127s\n",
341 			   "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
342 			   "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
343 			   "HHUptod\tSpecDst");
344 	else {
345 		struct rtable *r = v;
346 		char temp[256];
347 
348 		sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
349 			      "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
350 			r->u.dst.dev ? r->u.dst.dev->name : "*",
351 			(unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
352 			r->rt_flags, atomic_read(&r->u.dst.__refcnt),
353 			r->u.dst.__use, 0, (unsigned long)r->rt_src,
354 			(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
355 			     (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
356 			dst_metric(&r->u.dst, RTAX_WINDOW),
357 			(int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) +
358 			      dst_metric(&r->u.dst, RTAX_RTTVAR)),
359 			r->fl.fl4_tos,
360 			r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
361 			r->u.dst.hh ? (r->u.dst.hh->hh_output ==
362 				       dev_queue_xmit) : 0,
363 			r->rt_spec_dst);
364 		seq_printf(seq, "%-127s\n", temp);
365 	}
366 	return 0;
367 }
368 
369 static const struct seq_operations rt_cache_seq_ops = {
370 	.start  = rt_cache_seq_start,
371 	.next   = rt_cache_seq_next,
372 	.stop   = rt_cache_seq_stop,
373 	.show   = rt_cache_seq_show,
374 };
375 
376 static int rt_cache_seq_open(struct inode *inode, struct file *file)
377 {
378 	return seq_open_private(file, &rt_cache_seq_ops,
379 			sizeof(struct rt_cache_iter_state));
380 }
381 
382 static const struct file_operations rt_cache_seq_fops = {
383 	.owner	 = THIS_MODULE,
384 	.open	 = rt_cache_seq_open,
385 	.read	 = seq_read,
386 	.llseek	 = seq_lseek,
387 	.release = seq_release_private,
388 };
389 
390 
391 static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
392 {
393 	int cpu;
394 
395 	if (*pos == 0)
396 		return SEQ_START_TOKEN;
397 
398 	for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
399 		if (!cpu_possible(cpu))
400 			continue;
401 		*pos = cpu+1;
402 		return &per_cpu(rt_cache_stat, cpu);
403 	}
404 	return NULL;
405 }
406 
407 static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
408 {
409 	int cpu;
410 
411 	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
412 		if (!cpu_possible(cpu))
413 			continue;
414 		*pos = cpu+1;
415 		return &per_cpu(rt_cache_stat, cpu);
416 	}
417 	return NULL;
418 
419 }
420 
421 static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
422 {
423 
424 }
425 
426 static int rt_cpu_seq_show(struct seq_file *seq, void *v)
427 {
428 	struct rt_cache_stat *st = v;
429 
430 	if (v == SEQ_START_TOKEN) {
431 		seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
432 		return 0;
433 	}
434 
435 	seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
436 		   " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
437 		   atomic_read(&ipv4_dst_ops.entries),
438 		   st->in_hit,
439 		   st->in_slow_tot,
440 		   st->in_slow_mc,
441 		   st->in_no_route,
442 		   st->in_brd,
443 		   st->in_martian_dst,
444 		   st->in_martian_src,
445 
446 		   st->out_hit,
447 		   st->out_slow_tot,
448 		   st->out_slow_mc,
449 
450 		   st->gc_total,
451 		   st->gc_ignored,
452 		   st->gc_goal_miss,
453 		   st->gc_dst_overflow,
454 		   st->in_hlist_search,
455 		   st->out_hlist_search
456 		);
457 	return 0;
458 }
459 
460 static const struct seq_operations rt_cpu_seq_ops = {
461 	.start  = rt_cpu_seq_start,
462 	.next   = rt_cpu_seq_next,
463 	.stop   = rt_cpu_seq_stop,
464 	.show   = rt_cpu_seq_show,
465 };
466 
467 
468 static int rt_cpu_seq_open(struct inode *inode, struct file *file)
469 {
470 	return seq_open(file, &rt_cpu_seq_ops);
471 }
472 
473 static const struct file_operations rt_cpu_seq_fops = {
474 	.owner	 = THIS_MODULE,
475 	.open	 = rt_cpu_seq_open,
476 	.read	 = seq_read,
477 	.llseek	 = seq_lseek,
478 	.release = seq_release,
479 };
480 
481 #endif /* CONFIG_PROC_FS */
482 
483 static __inline__ void rt_free(struct rtable *rt)
484 {
485 	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
486 }
487 
488 static __inline__ void rt_drop(struct rtable *rt)
489 {
490 	ip_rt_put(rt);
491 	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
492 }
493 
494 static __inline__ int rt_fast_clean(struct rtable *rth)
495 {
496 	/* Kill broadcast/multicast entries very aggresively, if they
497 	   collide in hash table with more useful entries */
498 	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
499 		rth->fl.iif && rth->u.dst.rt_next;
500 }
501 
502 static __inline__ int rt_valuable(struct rtable *rth)
503 {
504 	return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
505 		rth->u.dst.expires;
506 }
507 
508 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
509 {
510 	unsigned long age;
511 	int ret = 0;
512 
513 	if (atomic_read(&rth->u.dst.__refcnt))
514 		goto out;
515 
516 	ret = 1;
517 	if (rth->u.dst.expires &&
518 	    time_after_eq(jiffies, rth->u.dst.expires))
519 		goto out;
520 
521 	age = jiffies - rth->u.dst.lastuse;
522 	ret = 0;
523 	if ((age <= tmo1 && !rt_fast_clean(rth)) ||
524 	    (age <= tmo2 && rt_valuable(rth)))
525 		goto out;
526 	ret = 1;
527 out:	return ret;
528 }
529 
530 /* Bits of score are:
531  * 31: very valuable
532  * 30: not quite useless
533  * 29..0: usage counter
534  */
535 static inline u32 rt_score(struct rtable *rt)
536 {
537 	u32 score = jiffies - rt->u.dst.lastuse;
538 
539 	score = ~score & ~(3<<30);
540 
541 	if (rt_valuable(rt))
542 		score |= (1<<31);
543 
544 	if (!rt->fl.iif ||
545 	    !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
546 		score |= (1<<30);
547 
548 	return score;
549 }
550 
551 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
552 {
553 	return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
554 		(fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
555 		(fl1->mark ^ fl2->mark) |
556 		(*(u16 *)&fl1->nl_u.ip4_u.tos ^
557 		 *(u16 *)&fl2->nl_u.ip4_u.tos) |
558 		(fl1->oif ^ fl2->oif) |
559 		(fl1->iif ^ fl2->iif)) == 0;
560 }
561 
562 static void rt_check_expire(struct work_struct *work)
563 {
564 	static unsigned int rover;
565 	unsigned int i = rover, goal;
566 	struct rtable *rth, **rthp;
567 	u64 mult;
568 
569 	mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
570 	if (ip_rt_gc_timeout > 1)
571 		do_div(mult, ip_rt_gc_timeout);
572 	goal = (unsigned int)mult;
573 	if (goal > rt_hash_mask)
574 		goal = rt_hash_mask + 1;
575 	for (; goal > 0; goal--) {
576 		unsigned long tmo = ip_rt_gc_timeout;
577 
578 		i = (i + 1) & rt_hash_mask;
579 		rthp = &rt_hash_table[i].chain;
580 
581 		if (need_resched())
582 			cond_resched();
583 
584 		if (*rthp == NULL)
585 			continue;
586 		spin_lock_bh(rt_hash_lock_addr(i));
587 		while ((rth = *rthp) != NULL) {
588 			if (rth->u.dst.expires) {
589 				/* Entry is expired even if it is in use */
590 				if (time_before_eq(jiffies, rth->u.dst.expires)) {
591 					tmo >>= 1;
592 					rthp = &rth->u.dst.rt_next;
593 					continue;
594 				}
595 			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
596 				tmo >>= 1;
597 				rthp = &rth->u.dst.rt_next;
598 				continue;
599 			}
600 
601 			/* Cleanup aged off entries. */
602 			*rthp = rth->u.dst.rt_next;
603 			rt_free(rth);
604 		}
605 		spin_unlock_bh(rt_hash_lock_addr(i));
606 	}
607 	rover = i;
608 	schedule_delayed_work(&expires_work, ip_rt_gc_interval);
609 }
610 
611 /* This can run from both BH and non-BH contexts, the latter
612  * in the case of a forced flush event.
613  */
614 static void rt_run_flush(unsigned long dummy)
615 {
616 	int i;
617 	struct rtable *rth, *next;
618 
619 	rt_deadline = 0;
620 
621 	get_random_bytes(&rt_hash_rnd, 4);
622 
623 	for (i = rt_hash_mask; i >= 0; i--) {
624 		spin_lock_bh(rt_hash_lock_addr(i));
625 		rth = rt_hash_table[i].chain;
626 		if (rth)
627 			rt_hash_table[i].chain = NULL;
628 		spin_unlock_bh(rt_hash_lock_addr(i));
629 
630 		for (; rth; rth = next) {
631 			next = rth->u.dst.rt_next;
632 			rt_free(rth);
633 		}
634 	}
635 }
636 
637 static DEFINE_SPINLOCK(rt_flush_lock);
638 
639 void rt_cache_flush(int delay)
640 {
641 	unsigned long now = jiffies;
642 	int user_mode = !in_softirq();
643 
644 	if (delay < 0)
645 		delay = ip_rt_min_delay;
646 
647 	spin_lock_bh(&rt_flush_lock);
648 
649 	if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
650 		long tmo = (long)(rt_deadline - now);
651 
652 		/* If flush timer is already running
653 		   and flush request is not immediate (delay > 0):
654 
655 		   if deadline is not achieved, prolongate timer to "delay",
656 		   otherwise fire it at deadline time.
657 		 */
658 
659 		if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay)
660 			tmo = 0;
661 
662 		if (delay > tmo)
663 			delay = tmo;
664 	}
665 
666 	if (delay <= 0) {
667 		spin_unlock_bh(&rt_flush_lock);
668 		rt_run_flush(0);
669 		return;
670 	}
671 
672 	if (rt_deadline == 0)
673 		rt_deadline = now + ip_rt_max_delay;
674 
675 	mod_timer(&rt_flush_timer, now+delay);
676 	spin_unlock_bh(&rt_flush_lock);
677 }
678 
679 static void rt_secret_rebuild(unsigned long dummy)
680 {
681 	unsigned long now = jiffies;
682 
683 	rt_cache_flush(0);
684 	mod_timer(&rt_secret_timer, now + ip_rt_secret_interval);
685 }
686 
687 /*
688    Short description of GC goals.
689 
690    We want to build algorithm, which will keep routing cache
691    at some equilibrium point, when number of aged off entries
692    is kept approximately equal to newly generated ones.
693 
694    Current expiration strength is variable "expire".
695    We try to adjust it dynamically, so that if networking
696    is idle expires is large enough to keep enough of warm entries,
697    and when load increases it reduces to limit cache size.
698  */
699 
700 static int rt_garbage_collect(void)
701 {
702 	static unsigned long expire = RT_GC_TIMEOUT;
703 	static unsigned long last_gc;
704 	static int rover;
705 	static int equilibrium;
706 	struct rtable *rth, **rthp;
707 	unsigned long now = jiffies;
708 	int goal;
709 
710 	/*
711 	 * Garbage collection is pretty expensive,
712 	 * do not make it too frequently.
713 	 */
714 
715 	RT_CACHE_STAT_INC(gc_total);
716 
717 	if (now - last_gc < ip_rt_gc_min_interval &&
718 	    atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) {
719 		RT_CACHE_STAT_INC(gc_ignored);
720 		goto out;
721 	}
722 
723 	/* Calculate number of entries, which we want to expire now. */
724 	goal = atomic_read(&ipv4_dst_ops.entries) -
725 		(ip_rt_gc_elasticity << rt_hash_log);
726 	if (goal <= 0) {
727 		if (equilibrium < ipv4_dst_ops.gc_thresh)
728 			equilibrium = ipv4_dst_ops.gc_thresh;
729 		goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
730 		if (goal > 0) {
731 			equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1);
732 			goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
733 		}
734 	} else {
735 		/* We are in dangerous area. Try to reduce cache really
736 		 * aggressively.
737 		 */
738 		goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1);
739 		equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
740 	}
741 
742 	if (now - last_gc >= ip_rt_gc_min_interval)
743 		last_gc = now;
744 
745 	if (goal <= 0) {
746 		equilibrium += goal;
747 		goto work_done;
748 	}
749 
750 	do {
751 		int i, k;
752 
753 		for (i = rt_hash_mask, k = rover; i >= 0; i--) {
754 			unsigned long tmo = expire;
755 
756 			k = (k + 1) & rt_hash_mask;
757 			rthp = &rt_hash_table[k].chain;
758 			spin_lock_bh(rt_hash_lock_addr(k));
759 			while ((rth = *rthp) != NULL) {
760 				if (!rt_may_expire(rth, tmo, expire)) {
761 					tmo >>= 1;
762 					rthp = &rth->u.dst.rt_next;
763 					continue;
764 				}
765 				*rthp = rth->u.dst.rt_next;
766 				rt_free(rth);
767 				goal--;
768 			}
769 			spin_unlock_bh(rt_hash_lock_addr(k));
770 			if (goal <= 0)
771 				break;
772 		}
773 		rover = k;
774 
775 		if (goal <= 0)
776 			goto work_done;
777 
778 		/* Goal is not achieved. We stop process if:
779 
780 		   - if expire reduced to zero. Otherwise, expire is halfed.
781 		   - if table is not full.
782 		   - if we are called from interrupt.
783 		   - jiffies check is just fallback/debug loop breaker.
784 		     We will not spin here for long time in any case.
785 		 */
786 
787 		RT_CACHE_STAT_INC(gc_goal_miss);
788 
789 		if (expire == 0)
790 			break;
791 
792 		expire >>= 1;
793 #if RT_CACHE_DEBUG >= 2
794 		printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
795 				atomic_read(&ipv4_dst_ops.entries), goal, i);
796 #endif
797 
798 		if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
799 			goto out;
800 	} while (!in_softirq() && time_before_eq(jiffies, now));
801 
802 	if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
803 		goto out;
804 	if (net_ratelimit())
805 		printk(KERN_WARNING "dst cache overflow\n");
806 	RT_CACHE_STAT_INC(gc_dst_overflow);
807 	return 1;
808 
809 work_done:
810 	expire += ip_rt_gc_min_interval;
811 	if (expire > ip_rt_gc_timeout ||
812 	    atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
813 		expire = ip_rt_gc_timeout;
814 #if RT_CACHE_DEBUG >= 2
815 	printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
816 			atomic_read(&ipv4_dst_ops.entries), goal, rover);
817 #endif
818 out:	return 0;
819 }
820 
821 static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
822 {
823 	struct rtable	*rth, **rthp;
824 	unsigned long	now;
825 	struct rtable *cand, **candp;
826 	u32 		min_score;
827 	int		chain_length;
828 	int attempts = !in_softirq();
829 
830 restart:
831 	chain_length = 0;
832 	min_score = ~(u32)0;
833 	cand = NULL;
834 	candp = NULL;
835 	now = jiffies;
836 
837 	rthp = &rt_hash_table[hash].chain;
838 
839 	spin_lock_bh(rt_hash_lock_addr(hash));
840 	while ((rth = *rthp) != NULL) {
841 		if (compare_keys(&rth->fl, &rt->fl)) {
842 			/* Put it first */
843 			*rthp = rth->u.dst.rt_next;
844 			/*
845 			 * Since lookup is lockfree, the deletion
846 			 * must be visible to another weakly ordered CPU before
847 			 * the insertion at the start of the hash chain.
848 			 */
849 			rcu_assign_pointer(rth->u.dst.rt_next,
850 					   rt_hash_table[hash].chain);
851 			/*
852 			 * Since lookup is lockfree, the update writes
853 			 * must be ordered for consistency on SMP.
854 			 */
855 			rcu_assign_pointer(rt_hash_table[hash].chain, rth);
856 
857 			dst_use(&rth->u.dst, now);
858 			spin_unlock_bh(rt_hash_lock_addr(hash));
859 
860 			rt_drop(rt);
861 			*rp = rth;
862 			return 0;
863 		}
864 
865 		if (!atomic_read(&rth->u.dst.__refcnt)) {
866 			u32 score = rt_score(rth);
867 
868 			if (score <= min_score) {
869 				cand = rth;
870 				candp = rthp;
871 				min_score = score;
872 			}
873 		}
874 
875 		chain_length++;
876 
877 		rthp = &rth->u.dst.rt_next;
878 	}
879 
880 	if (cand) {
881 		/* ip_rt_gc_elasticity used to be average length of chain
882 		 * length, when exceeded gc becomes really aggressive.
883 		 *
884 		 * The second limit is less certain. At the moment it allows
885 		 * only 2 entries per bucket. We will see.
886 		 */
887 		if (chain_length > ip_rt_gc_elasticity) {
888 			*candp = cand->u.dst.rt_next;
889 			rt_free(cand);
890 		}
891 	}
892 
893 	/* Try to bind route to arp only if it is output
894 	   route or unicast forwarding path.
895 	 */
896 	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
897 		int err = arp_bind_neighbour(&rt->u.dst);
898 		if (err) {
899 			spin_unlock_bh(rt_hash_lock_addr(hash));
900 
901 			if (err != -ENOBUFS) {
902 				rt_drop(rt);
903 				return err;
904 			}
905 
906 			/* Neighbour tables are full and nothing
907 			   can be released. Try to shrink route cache,
908 			   it is most likely it holds some neighbour records.
909 			 */
910 			if (attempts-- > 0) {
911 				int saved_elasticity = ip_rt_gc_elasticity;
912 				int saved_int = ip_rt_gc_min_interval;
913 				ip_rt_gc_elasticity	= 1;
914 				ip_rt_gc_min_interval	= 0;
915 				rt_garbage_collect();
916 				ip_rt_gc_min_interval	= saved_int;
917 				ip_rt_gc_elasticity	= saved_elasticity;
918 				goto restart;
919 			}
920 
921 			if (net_ratelimit())
922 				printk(KERN_WARNING "Neighbour table overflow.\n");
923 			rt_drop(rt);
924 			return -ENOBUFS;
925 		}
926 	}
927 
928 	rt->u.dst.rt_next = rt_hash_table[hash].chain;
929 #if RT_CACHE_DEBUG >= 2
930 	if (rt->u.dst.rt_next) {
931 		struct rtable *trt;
932 		printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash,
933 		       NIPQUAD(rt->rt_dst));
934 		for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
935 			printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst));
936 		printk("\n");
937 	}
938 #endif
939 	rt_hash_table[hash].chain = rt;
940 	spin_unlock_bh(rt_hash_lock_addr(hash));
941 	*rp = rt;
942 	return 0;
943 }
944 
945 void rt_bind_peer(struct rtable *rt, int create)
946 {
947 	static DEFINE_SPINLOCK(rt_peer_lock);
948 	struct inet_peer *peer;
949 
950 	peer = inet_getpeer(rt->rt_dst, create);
951 
952 	spin_lock_bh(&rt_peer_lock);
953 	if (rt->peer == NULL) {
954 		rt->peer = peer;
955 		peer = NULL;
956 	}
957 	spin_unlock_bh(&rt_peer_lock);
958 	if (peer)
959 		inet_putpeer(peer);
960 }
961 
962 /*
963  * Peer allocation may fail only in serious out-of-memory conditions.  However
964  * we still can generate some output.
965  * Random ID selection looks a bit dangerous because we have no chances to
966  * select ID being unique in a reasonable period of time.
967  * But broken packet identifier may be better than no packet at all.
968  */
969 static void ip_select_fb_ident(struct iphdr *iph)
970 {
971 	static DEFINE_SPINLOCK(ip_fb_id_lock);
972 	static u32 ip_fallback_id;
973 	u32 salt;
974 
975 	spin_lock_bh(&ip_fb_id_lock);
976 	salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
977 	iph->id = htons(salt & 0xFFFF);
978 	ip_fallback_id = salt;
979 	spin_unlock_bh(&ip_fb_id_lock);
980 }
981 
982 void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
983 {
984 	struct rtable *rt = (struct rtable *) dst;
985 
986 	if (rt) {
987 		if (rt->peer == NULL)
988 			rt_bind_peer(rt, 1);
989 
990 		/* If peer is attached to destination, it is never detached,
991 		   so that we need not to grab a lock to dereference it.
992 		 */
993 		if (rt->peer) {
994 			iph->id = htons(inet_getid(rt->peer, more));
995 			return;
996 		}
997 	} else
998 		printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
999 		       __builtin_return_address(0));
1000 
1001 	ip_select_fb_ident(iph);
1002 }
1003 
1004 static void rt_del(unsigned hash, struct rtable *rt)
1005 {
1006 	struct rtable **rthp;
1007 
1008 	spin_lock_bh(rt_hash_lock_addr(hash));
1009 	ip_rt_put(rt);
1010 	for (rthp = &rt_hash_table[hash].chain; *rthp;
1011 	     rthp = &(*rthp)->u.dst.rt_next)
1012 		if (*rthp == rt) {
1013 			*rthp = rt->u.dst.rt_next;
1014 			rt_free(rt);
1015 			break;
1016 		}
1017 	spin_unlock_bh(rt_hash_lock_addr(hash));
1018 }
1019 
1020 void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1021 		    __be32 saddr, struct net_device *dev)
1022 {
1023 	int i, k;
1024 	struct in_device *in_dev = in_dev_get(dev);
1025 	struct rtable *rth, **rthp;
1026 	__be32  skeys[2] = { saddr, 0 };
1027 	int  ikeys[2] = { dev->ifindex, 0 };
1028 	struct netevent_redirect netevent;
1029 
1030 	if (!in_dev)
1031 		return;
1032 
1033 	if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1034 	    || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw))
1035 		goto reject_redirect;
1036 
1037 	if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1038 		if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1039 			goto reject_redirect;
1040 		if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1041 			goto reject_redirect;
1042 	} else {
1043 		if (inet_addr_type(new_gw) != RTN_UNICAST)
1044 			goto reject_redirect;
1045 	}
1046 
1047 	for (i = 0; i < 2; i++) {
1048 		for (k = 0; k < 2; k++) {
1049 			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
1050 
1051 			rthp=&rt_hash_table[hash].chain;
1052 
1053 			rcu_read_lock();
1054 			while ((rth = rcu_dereference(*rthp)) != NULL) {
1055 				struct rtable *rt;
1056 
1057 				if (rth->fl.fl4_dst != daddr ||
1058 				    rth->fl.fl4_src != skeys[i] ||
1059 				    rth->fl.oif != ikeys[k] ||
1060 				    rth->fl.iif != 0) {
1061 					rthp = &rth->u.dst.rt_next;
1062 					continue;
1063 				}
1064 
1065 				if (rth->rt_dst != daddr ||
1066 				    rth->rt_src != saddr ||
1067 				    rth->u.dst.error ||
1068 				    rth->rt_gateway != old_gw ||
1069 				    rth->u.dst.dev != dev)
1070 					break;
1071 
1072 				dst_hold(&rth->u.dst);
1073 				rcu_read_unlock();
1074 
1075 				rt = dst_alloc(&ipv4_dst_ops);
1076 				if (rt == NULL) {
1077 					ip_rt_put(rth);
1078 					in_dev_put(in_dev);
1079 					return;
1080 				}
1081 
1082 				/* Copy all the information. */
1083 				*rt = *rth;
1084 				INIT_RCU_HEAD(&rt->u.dst.rcu_head);
1085 				rt->u.dst.__use		= 1;
1086 				atomic_set(&rt->u.dst.__refcnt, 1);
1087 				rt->u.dst.child		= NULL;
1088 				if (rt->u.dst.dev)
1089 					dev_hold(rt->u.dst.dev);
1090 				if (rt->idev)
1091 					in_dev_hold(rt->idev);
1092 				rt->u.dst.obsolete	= 0;
1093 				rt->u.dst.lastuse	= jiffies;
1094 				rt->u.dst.path		= &rt->u.dst;
1095 				rt->u.dst.neighbour	= NULL;
1096 				rt->u.dst.hh		= NULL;
1097 				rt->u.dst.xfrm		= NULL;
1098 
1099 				rt->rt_flags		|= RTCF_REDIRECTED;
1100 
1101 				/* Gateway is different ... */
1102 				rt->rt_gateway		= new_gw;
1103 
1104 				/* Redirect received -> path was valid */
1105 				dst_confirm(&rth->u.dst);
1106 
1107 				if (rt->peer)
1108 					atomic_inc(&rt->peer->refcnt);
1109 
1110 				if (arp_bind_neighbour(&rt->u.dst) ||
1111 				    !(rt->u.dst.neighbour->nud_state &
1112 					    NUD_VALID)) {
1113 					if (rt->u.dst.neighbour)
1114 						neigh_event_send(rt->u.dst.neighbour, NULL);
1115 					ip_rt_put(rth);
1116 					rt_drop(rt);
1117 					goto do_next;
1118 				}
1119 
1120 				netevent.old = &rth->u.dst;
1121 				netevent.new = &rt->u.dst;
1122 				call_netevent_notifiers(NETEVENT_REDIRECT,
1123 							&netevent);
1124 
1125 				rt_del(hash, rth);
1126 				if (!rt_intern_hash(hash, rt, &rt))
1127 					ip_rt_put(rt);
1128 				goto do_next;
1129 			}
1130 			rcu_read_unlock();
1131 		do_next:
1132 			;
1133 		}
1134 	}
1135 	in_dev_put(in_dev);
1136 	return;
1137 
1138 reject_redirect:
1139 #ifdef CONFIG_IP_ROUTE_VERBOSE
1140 	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
1141 		printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about "
1142 			"%u.%u.%u.%u ignored.\n"
1143 			"  Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n",
1144 		       NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
1145 		       NIPQUAD(saddr), NIPQUAD(daddr));
1146 #endif
1147 	in_dev_put(in_dev);
1148 }
1149 
1150 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1151 {
1152 	struct rtable *rt = (struct rtable*)dst;
1153 	struct dst_entry *ret = dst;
1154 
1155 	if (rt) {
1156 		if (dst->obsolete) {
1157 			ip_rt_put(rt);
1158 			ret = NULL;
1159 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1160 			   rt->u.dst.expires) {
1161 			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1162 						rt->fl.oif);
1163 #if RT_CACHE_DEBUG >= 1
1164 			printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
1165 					  "%u.%u.%u.%u/%02x dropped\n",
1166 				NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
1167 #endif
1168 			rt_del(hash, rt);
1169 			ret = NULL;
1170 		}
1171 	}
1172 	return ret;
1173 }
1174 
1175 /*
1176  * Algorithm:
1177  *	1. The first ip_rt_redirect_number redirects are sent
1178  *	   with exponential backoff, then we stop sending them at all,
1179  *	   assuming that the host ignores our redirects.
1180  *	2. If we did not see packets requiring redirects
1181  *	   during ip_rt_redirect_silence, we assume that the host
1182  *	   forgot redirected route and start to send redirects again.
1183  *
1184  * This algorithm is much cheaper and more intelligent than dumb load limiting
1185  * in icmp.c.
1186  *
1187  * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
1188  * and "frag. need" (breaks PMTU discovery) in icmp.c.
1189  */
1190 
1191 void ip_rt_send_redirect(struct sk_buff *skb)
1192 {
1193 	struct rtable *rt = (struct rtable*)skb->dst;
1194 	struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
1195 
1196 	if (!in_dev)
1197 		return;
1198 
1199 	if (!IN_DEV_TX_REDIRECTS(in_dev))
1200 		goto out;
1201 
1202 	/* No redirected packets during ip_rt_redirect_silence;
1203 	 * reset the algorithm.
1204 	 */
1205 	if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence))
1206 		rt->u.dst.rate_tokens = 0;
1207 
1208 	/* Too many ignored redirects; do not send anything
1209 	 * set u.dst.rate_last to the last seen redirected packet.
1210 	 */
1211 	if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
1212 		rt->u.dst.rate_last = jiffies;
1213 		goto out;
1214 	}
1215 
1216 	/* Check for load limit; set rate_last to the latest sent
1217 	 * redirect.
1218 	 */
1219 	if (rt->u.dst.rate_tokens == 0 ||
1220 	    time_after(jiffies,
1221 		       (rt->u.dst.rate_last +
1222 			(ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
1223 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1224 		rt->u.dst.rate_last = jiffies;
1225 		++rt->u.dst.rate_tokens;
1226 #ifdef CONFIG_IP_ROUTE_VERBOSE
1227 		if (IN_DEV_LOG_MARTIANS(in_dev) &&
1228 		    rt->u.dst.rate_tokens == ip_rt_redirect_number &&
1229 		    net_ratelimit())
1230 			printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores "
1231 				"redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n",
1232 				NIPQUAD(rt->rt_src), rt->rt_iif,
1233 				NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
1234 #endif
1235 	}
1236 out:
1237 	in_dev_put(in_dev);
1238 }
1239 
1240 static int ip_error(struct sk_buff *skb)
1241 {
1242 	struct rtable *rt = (struct rtable*)skb->dst;
1243 	unsigned long now;
1244 	int code;
1245 
1246 	switch (rt->u.dst.error) {
1247 		case EINVAL:
1248 		default:
1249 			goto out;
1250 		case EHOSTUNREACH:
1251 			code = ICMP_HOST_UNREACH;
1252 			break;
1253 		case ENETUNREACH:
1254 			code = ICMP_NET_UNREACH;
1255 			IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
1256 			break;
1257 		case EACCES:
1258 			code = ICMP_PKT_FILTERED;
1259 			break;
1260 	}
1261 
1262 	now = jiffies;
1263 	rt->u.dst.rate_tokens += now - rt->u.dst.rate_last;
1264 	if (rt->u.dst.rate_tokens > ip_rt_error_burst)
1265 		rt->u.dst.rate_tokens = ip_rt_error_burst;
1266 	rt->u.dst.rate_last = now;
1267 	if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
1268 		rt->u.dst.rate_tokens -= ip_rt_error_cost;
1269 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1270 	}
1271 
1272 out:	kfree_skb(skb);
1273 	return 0;
1274 }
1275 
1276 /*
1277  *	The last two values are not from the RFC but
1278  *	are needed for AMPRnet AX.25 paths.
1279  */
1280 
1281 static const unsigned short mtu_plateau[] =
1282 {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
1283 
1284 static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
1285 {
1286 	int i;
1287 
1288 	for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++)
1289 		if (old_mtu > mtu_plateau[i])
1290 			return mtu_plateau[i];
1291 	return 68;
1292 }
1293 
1294 unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
1295 {
1296 	int i;
1297 	unsigned short old_mtu = ntohs(iph->tot_len);
1298 	struct rtable *rth;
1299 	__be32  skeys[2] = { iph->saddr, 0, };
1300 	__be32  daddr = iph->daddr;
1301 	unsigned short est_mtu = 0;
1302 
1303 	if (ipv4_config.no_pmtu_disc)
1304 		return 0;
1305 
1306 	for (i = 0; i < 2; i++) {
1307 		unsigned hash = rt_hash(daddr, skeys[i], 0);
1308 
1309 		rcu_read_lock();
1310 		for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1311 		     rth = rcu_dereference(rth->u.dst.rt_next)) {
1312 			if (rth->fl.fl4_dst == daddr &&
1313 			    rth->fl.fl4_src == skeys[i] &&
1314 			    rth->rt_dst  == daddr &&
1315 			    rth->rt_src  == iph->saddr &&
1316 			    rth->fl.iif == 0 &&
1317 			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
1318 				unsigned short mtu = new_mtu;
1319 
1320 				if (new_mtu < 68 || new_mtu >= old_mtu) {
1321 
1322 					/* BSD 4.2 compatibility hack :-( */
1323 					if (mtu == 0 &&
1324 					    old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] &&
1325 					    old_mtu >= 68 + (iph->ihl << 2))
1326 						old_mtu -= iph->ihl << 2;
1327 
1328 					mtu = guess_mtu(old_mtu);
1329 				}
1330 				if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) {
1331 					if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) {
1332 						dst_confirm(&rth->u.dst);
1333 						if (mtu < ip_rt_min_pmtu) {
1334 							mtu = ip_rt_min_pmtu;
1335 							rth->u.dst.metrics[RTAX_LOCK-1] |=
1336 								(1 << RTAX_MTU);
1337 						}
1338 						rth->u.dst.metrics[RTAX_MTU-1] = mtu;
1339 						dst_set_expires(&rth->u.dst,
1340 							ip_rt_mtu_expires);
1341 					}
1342 					est_mtu = mtu;
1343 				}
1344 			}
1345 		}
1346 		rcu_read_unlock();
1347 	}
1348 	return est_mtu ? : new_mtu;
1349 }
1350 
1351 static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1352 {
1353 	if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 &&
1354 	    !(dst_metric_locked(dst, RTAX_MTU))) {
1355 		if (mtu < ip_rt_min_pmtu) {
1356 			mtu = ip_rt_min_pmtu;
1357 			dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
1358 		}
1359 		dst->metrics[RTAX_MTU-1] = mtu;
1360 		dst_set_expires(dst, ip_rt_mtu_expires);
1361 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1362 	}
1363 }
1364 
1365 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1366 {
1367 	return NULL;
1368 }
1369 
1370 static void ipv4_dst_destroy(struct dst_entry *dst)
1371 {
1372 	struct rtable *rt = (struct rtable *) dst;
1373 	struct inet_peer *peer = rt->peer;
1374 	struct in_device *idev = rt->idev;
1375 
1376 	if (peer) {
1377 		rt->peer = NULL;
1378 		inet_putpeer(peer);
1379 	}
1380 
1381 	if (idev) {
1382 		rt->idev = NULL;
1383 		in_dev_put(idev);
1384 	}
1385 }
1386 
1387 static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1388 			    int how)
1389 {
1390 	struct rtable *rt = (struct rtable *) dst;
1391 	struct in_device *idev = rt->idev;
1392 	if (dev != init_net.loopback_dev && idev && idev->dev == dev) {
1393 		struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev);
1394 		if (loopback_idev) {
1395 			rt->idev = loopback_idev;
1396 			in_dev_put(idev);
1397 		}
1398 	}
1399 }
1400 
1401 static void ipv4_link_failure(struct sk_buff *skb)
1402 {
1403 	struct rtable *rt;
1404 
1405 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1406 
1407 	rt = (struct rtable *) skb->dst;
1408 	if (rt)
1409 		dst_set_expires(&rt->u.dst, 0);
1410 }
1411 
1412 static int ip_rt_bug(struct sk_buff *skb)
1413 {
1414 	printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
1415 		NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
1416 		skb->dev ? skb->dev->name : "?");
1417 	kfree_skb(skb);
1418 	return 0;
1419 }
1420 
1421 /*
1422    We do not cache source address of outgoing interface,
1423    because it is used only by IP RR, TS and SRR options,
1424    so that it out of fast path.
1425 
1426    BTW remember: "addr" is allowed to be not aligned
1427    in IP options!
1428  */
1429 
1430 void ip_rt_get_source(u8 *addr, struct rtable *rt)
1431 {
1432 	__be32 src;
1433 	struct fib_result res;
1434 
1435 	if (rt->fl.iif == 0)
1436 		src = rt->rt_src;
1437 	else if (fib_lookup(&rt->fl, &res) == 0) {
1438 		src = FIB_RES_PREFSRC(res);
1439 		fib_res_put(&res);
1440 	} else
1441 		src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
1442 					RT_SCOPE_UNIVERSE);
1443 	memcpy(addr, &src, 4);
1444 }
1445 
1446 #ifdef CONFIG_NET_CLS_ROUTE
1447 static void set_class_tag(struct rtable *rt, u32 tag)
1448 {
1449 	if (!(rt->u.dst.tclassid & 0xFFFF))
1450 		rt->u.dst.tclassid |= tag & 0xFFFF;
1451 	if (!(rt->u.dst.tclassid & 0xFFFF0000))
1452 		rt->u.dst.tclassid |= tag & 0xFFFF0000;
1453 }
1454 #endif
1455 
1456 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1457 {
1458 	struct fib_info *fi = res->fi;
1459 
1460 	if (fi) {
1461 		if (FIB_RES_GW(*res) &&
1462 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1463 			rt->rt_gateway = FIB_RES_GW(*res);
1464 		memcpy(rt->u.dst.metrics, fi->fib_metrics,
1465 		       sizeof(rt->u.dst.metrics));
1466 		if (fi->fib_mtu == 0) {
1467 			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
1468 			if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) &&
1469 			    rt->rt_gateway != rt->rt_dst &&
1470 			    rt->u.dst.dev->mtu > 576)
1471 				rt->u.dst.metrics[RTAX_MTU-1] = 576;
1472 		}
1473 #ifdef CONFIG_NET_CLS_ROUTE
1474 		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
1475 #endif
1476 	} else
1477 		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
1478 
1479 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1480 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1481 	if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
1482 		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1483 	if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
1484 		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
1485 				       ip_rt_min_advmss);
1486 	if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40)
1487 		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1488 
1489 #ifdef CONFIG_NET_CLS_ROUTE
1490 #ifdef CONFIG_IP_MULTIPLE_TABLES
1491 	set_class_tag(rt, fib_rules_tclass(res));
1492 #endif
1493 	set_class_tag(rt, itag);
1494 #endif
1495 	rt->rt_type = res->type;
1496 }
1497 
1498 static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1499 				u8 tos, struct net_device *dev, int our)
1500 {
1501 	unsigned hash;
1502 	struct rtable *rth;
1503 	__be32 spec_dst;
1504 	struct in_device *in_dev = in_dev_get(dev);
1505 	u32 itag = 0;
1506 
1507 	/* Primary sanity checks. */
1508 
1509 	if (in_dev == NULL)
1510 		return -EINVAL;
1511 
1512 	if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) ||
1513 	    skb->protocol != htons(ETH_P_IP))
1514 		goto e_inval;
1515 
1516 	if (ZERONET(saddr)) {
1517 		if (!LOCAL_MCAST(daddr))
1518 			goto e_inval;
1519 		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1520 	} else if (fib_validate_source(saddr, 0, tos, 0,
1521 					dev, &spec_dst, &itag) < 0)
1522 		goto e_inval;
1523 
1524 	rth = dst_alloc(&ipv4_dst_ops);
1525 	if (!rth)
1526 		goto e_nobufs;
1527 
1528 	rth->u.dst.output= ip_rt_bug;
1529 
1530 	atomic_set(&rth->u.dst.__refcnt, 1);
1531 	rth->u.dst.flags= DST_HOST;
1532 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1533 		rth->u.dst.flags |= DST_NOPOLICY;
1534 	rth->fl.fl4_dst	= daddr;
1535 	rth->rt_dst	= daddr;
1536 	rth->fl.fl4_tos	= tos;
1537 	rth->fl.mark    = skb->mark;
1538 	rth->fl.fl4_src	= saddr;
1539 	rth->rt_src	= saddr;
1540 #ifdef CONFIG_NET_CLS_ROUTE
1541 	rth->u.dst.tclassid = itag;
1542 #endif
1543 	rth->rt_iif	=
1544 	rth->fl.iif	= dev->ifindex;
1545 	rth->u.dst.dev	= init_net.loopback_dev;
1546 	dev_hold(rth->u.dst.dev);
1547 	rth->idev	= in_dev_get(rth->u.dst.dev);
1548 	rth->fl.oif	= 0;
1549 	rth->rt_gateway	= daddr;
1550 	rth->rt_spec_dst= spec_dst;
1551 	rth->rt_type	= RTN_MULTICAST;
1552 	rth->rt_flags	= RTCF_MULTICAST;
1553 	if (our) {
1554 		rth->u.dst.input= ip_local_deliver;
1555 		rth->rt_flags |= RTCF_LOCAL;
1556 	}
1557 
1558 #ifdef CONFIG_IP_MROUTE
1559 	if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
1560 		rth->u.dst.input = ip_mr_input;
1561 #endif
1562 	RT_CACHE_STAT_INC(in_slow_mc);
1563 
1564 	in_dev_put(in_dev);
1565 	hash = rt_hash(daddr, saddr, dev->ifindex);
1566 	return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
1567 
1568 e_nobufs:
1569 	in_dev_put(in_dev);
1570 	return -ENOBUFS;
1571 
1572 e_inval:
1573 	in_dev_put(in_dev);
1574 	return -EINVAL;
1575 }
1576 
1577 
1578 static void ip_handle_martian_source(struct net_device *dev,
1579 				     struct in_device *in_dev,
1580 				     struct sk_buff *skb,
1581 				     __be32 daddr,
1582 				     __be32 saddr)
1583 {
1584 	RT_CACHE_STAT_INC(in_martian_src);
1585 #ifdef CONFIG_IP_ROUTE_VERBOSE
1586 	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1587 		/*
1588 		 *	RFC1812 recommendation, if source is martian,
1589 		 *	the only hint is MAC header.
1590 		 */
1591 		printk(KERN_WARNING "martian source %u.%u.%u.%u from "
1592 			"%u.%u.%u.%u, on dev %s\n",
1593 			NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
1594 		if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1595 			int i;
1596 			const unsigned char *p = skb_mac_header(skb);
1597 			printk(KERN_WARNING "ll header: ");
1598 			for (i = 0; i < dev->hard_header_len; i++, p++) {
1599 				printk("%02x", *p);
1600 				if (i < (dev->hard_header_len - 1))
1601 					printk(":");
1602 			}
1603 			printk("\n");
1604 		}
1605 	}
1606 #endif
1607 }
1608 
1609 static inline int __mkroute_input(struct sk_buff *skb,
1610 				  struct fib_result* res,
1611 				  struct in_device *in_dev,
1612 				  __be32 daddr, __be32 saddr, u32 tos,
1613 				  struct rtable **result)
1614 {
1615 
1616 	struct rtable *rth;
1617 	int err;
1618 	struct in_device *out_dev;
1619 	unsigned flags = 0;
1620 	__be32 spec_dst;
1621 	u32 itag;
1622 
1623 	/* get a working reference to the output device */
1624 	out_dev = in_dev_get(FIB_RES_DEV(*res));
1625 	if (out_dev == NULL) {
1626 		if (net_ratelimit())
1627 			printk(KERN_CRIT "Bug in ip_route_input" \
1628 			       "_slow(). Please, report\n");
1629 		return -EINVAL;
1630 	}
1631 
1632 
1633 	err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),
1634 				  in_dev->dev, &spec_dst, &itag);
1635 	if (err < 0) {
1636 		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1637 					 saddr);
1638 
1639 		err = -EINVAL;
1640 		goto cleanup;
1641 	}
1642 
1643 	if (err)
1644 		flags |= RTCF_DIRECTSRC;
1645 
1646 	if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
1647 	    (IN_DEV_SHARED_MEDIA(out_dev) ||
1648 	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1649 		flags |= RTCF_DOREDIRECT;
1650 
1651 	if (skb->protocol != htons(ETH_P_IP)) {
1652 		/* Not IP (i.e. ARP). Do not create route, if it is
1653 		 * invalid for proxy arp. DNAT routes are always valid.
1654 		 */
1655 		if (out_dev == in_dev && !(flags & RTCF_DNAT)) {
1656 			err = -EINVAL;
1657 			goto cleanup;
1658 		}
1659 	}
1660 
1661 
1662 	rth = dst_alloc(&ipv4_dst_ops);
1663 	if (!rth) {
1664 		err = -ENOBUFS;
1665 		goto cleanup;
1666 	}
1667 
1668 	atomic_set(&rth->u.dst.__refcnt, 1);
1669 	rth->u.dst.flags= DST_HOST;
1670 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1671 		rth->u.dst.flags |= DST_NOPOLICY;
1672 	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
1673 		rth->u.dst.flags |= DST_NOXFRM;
1674 	rth->fl.fl4_dst	= daddr;
1675 	rth->rt_dst	= daddr;
1676 	rth->fl.fl4_tos	= tos;
1677 	rth->fl.mark    = skb->mark;
1678 	rth->fl.fl4_src	= saddr;
1679 	rth->rt_src	= saddr;
1680 	rth->rt_gateway	= daddr;
1681 	rth->rt_iif 	=
1682 		rth->fl.iif	= in_dev->dev->ifindex;
1683 	rth->u.dst.dev	= (out_dev)->dev;
1684 	dev_hold(rth->u.dst.dev);
1685 	rth->idev	= in_dev_get(rth->u.dst.dev);
1686 	rth->fl.oif 	= 0;
1687 	rth->rt_spec_dst= spec_dst;
1688 
1689 	rth->u.dst.input = ip_forward;
1690 	rth->u.dst.output = ip_output;
1691 
1692 	rt_set_nexthop(rth, res, itag);
1693 
1694 	rth->rt_flags = flags;
1695 
1696 	*result = rth;
1697 	err = 0;
1698  cleanup:
1699 	/* release the working reference to the output device */
1700 	in_dev_put(out_dev);
1701 	return err;
1702 }
1703 
1704 static inline int ip_mkroute_input(struct sk_buff *skb,
1705 				   struct fib_result* res,
1706 				   const struct flowi *fl,
1707 				   struct in_device *in_dev,
1708 				   __be32 daddr, __be32 saddr, u32 tos)
1709 {
1710 	struct rtable* rth = NULL;
1711 	int err;
1712 	unsigned hash;
1713 
1714 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1715 	if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1716 		fib_select_multipath(fl, res);
1717 #endif
1718 
1719 	/* create a routing cache entry */
1720 	err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1721 	if (err)
1722 		return err;
1723 
1724 	/* put it into the cache */
1725 	hash = rt_hash(daddr, saddr, fl->iif);
1726 	return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
1727 }
1728 
1729 /*
1730  *	NOTE. We drop all the packets that has local source
1731  *	addresses, because every properly looped back packet
1732  *	must have correct destination already attached by output routine.
1733  *
1734  *	Such approach solves two big problems:
1735  *	1. Not simplex devices are handled properly.
1736  *	2. IP spoofing attempts are filtered with 100% of guarantee.
1737  */
1738 
1739 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1740 			       u8 tos, struct net_device *dev)
1741 {
1742 	struct fib_result res;
1743 	struct in_device *in_dev = in_dev_get(dev);
1744 	struct flowi fl = { .nl_u = { .ip4_u =
1745 				      { .daddr = daddr,
1746 					.saddr = saddr,
1747 					.tos = tos,
1748 					.scope = RT_SCOPE_UNIVERSE,
1749 				      } },
1750 			    .mark = skb->mark,
1751 			    .iif = dev->ifindex };
1752 	unsigned	flags = 0;
1753 	u32		itag = 0;
1754 	struct rtable * rth;
1755 	unsigned	hash;
1756 	__be32		spec_dst;
1757 	int		err = -EINVAL;
1758 	int		free_res = 0;
1759 
1760 	/* IP on this device is disabled. */
1761 
1762 	if (!in_dev)
1763 		goto out;
1764 
1765 	/* Check for the most weird martians, which can be not detected
1766 	   by fib_lookup.
1767 	 */
1768 
1769 	if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr))
1770 		goto martian_source;
1771 
1772 	if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
1773 		goto brd_input;
1774 
1775 	/* Accept zero addresses only to limited broadcast;
1776 	 * I even do not know to fix it or not. Waiting for complains :-)
1777 	 */
1778 	if (ZERONET(saddr))
1779 		goto martian_source;
1780 
1781 	if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
1782 		goto martian_destination;
1783 
1784 	/*
1785 	 *	Now we are ready to route packet.
1786 	 */
1787 	if ((err = fib_lookup(&fl, &res)) != 0) {
1788 		if (!IN_DEV_FORWARD(in_dev))
1789 			goto e_hostunreach;
1790 		goto no_route;
1791 	}
1792 	free_res = 1;
1793 
1794 	RT_CACHE_STAT_INC(in_slow_tot);
1795 
1796 	if (res.type == RTN_BROADCAST)
1797 		goto brd_input;
1798 
1799 	if (res.type == RTN_LOCAL) {
1800 		int result;
1801 		result = fib_validate_source(saddr, daddr, tos,
1802 					     init_net.loopback_dev->ifindex,
1803 					     dev, &spec_dst, &itag);
1804 		if (result < 0)
1805 			goto martian_source;
1806 		if (result)
1807 			flags |= RTCF_DIRECTSRC;
1808 		spec_dst = daddr;
1809 		goto local_input;
1810 	}
1811 
1812 	if (!IN_DEV_FORWARD(in_dev))
1813 		goto e_hostunreach;
1814 	if (res.type != RTN_UNICAST)
1815 		goto martian_destination;
1816 
1817 	err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1818 done:
1819 	in_dev_put(in_dev);
1820 	if (free_res)
1821 		fib_res_put(&res);
1822 out:	return err;
1823 
1824 brd_input:
1825 	if (skb->protocol != htons(ETH_P_IP))
1826 		goto e_inval;
1827 
1828 	if (ZERONET(saddr))
1829 		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1830 	else {
1831 		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1832 					  &itag);
1833 		if (err < 0)
1834 			goto martian_source;
1835 		if (err)
1836 			flags |= RTCF_DIRECTSRC;
1837 	}
1838 	flags |= RTCF_BROADCAST;
1839 	res.type = RTN_BROADCAST;
1840 	RT_CACHE_STAT_INC(in_brd);
1841 
1842 local_input:
1843 	rth = dst_alloc(&ipv4_dst_ops);
1844 	if (!rth)
1845 		goto e_nobufs;
1846 
1847 	rth->u.dst.output= ip_rt_bug;
1848 
1849 	atomic_set(&rth->u.dst.__refcnt, 1);
1850 	rth->u.dst.flags= DST_HOST;
1851 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1852 		rth->u.dst.flags |= DST_NOPOLICY;
1853 	rth->fl.fl4_dst	= daddr;
1854 	rth->rt_dst	= daddr;
1855 	rth->fl.fl4_tos	= tos;
1856 	rth->fl.mark    = skb->mark;
1857 	rth->fl.fl4_src	= saddr;
1858 	rth->rt_src	= saddr;
1859 #ifdef CONFIG_NET_CLS_ROUTE
1860 	rth->u.dst.tclassid = itag;
1861 #endif
1862 	rth->rt_iif	=
1863 	rth->fl.iif	= dev->ifindex;
1864 	rth->u.dst.dev	= init_net.loopback_dev;
1865 	dev_hold(rth->u.dst.dev);
1866 	rth->idev	= in_dev_get(rth->u.dst.dev);
1867 	rth->rt_gateway	= daddr;
1868 	rth->rt_spec_dst= spec_dst;
1869 	rth->u.dst.input= ip_local_deliver;
1870 	rth->rt_flags 	= flags|RTCF_LOCAL;
1871 	if (res.type == RTN_UNREACHABLE) {
1872 		rth->u.dst.input= ip_error;
1873 		rth->u.dst.error= -err;
1874 		rth->rt_flags 	&= ~RTCF_LOCAL;
1875 	}
1876 	rth->rt_type	= res.type;
1877 	hash = rt_hash(daddr, saddr, fl.iif);
1878 	err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
1879 	goto done;
1880 
1881 no_route:
1882 	RT_CACHE_STAT_INC(in_no_route);
1883 	spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1884 	res.type = RTN_UNREACHABLE;
1885 	if (err == -ESRCH)
1886 		err = -ENETUNREACH;
1887 	goto local_input;
1888 
1889 	/*
1890 	 *	Do not cache martian addresses: they should be logged (RFC1812)
1891 	 */
1892 martian_destination:
1893 	RT_CACHE_STAT_INC(in_martian_dst);
1894 #ifdef CONFIG_IP_ROUTE_VERBOSE
1895 	if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
1896 		printk(KERN_WARNING "martian destination %u.%u.%u.%u from "
1897 			"%u.%u.%u.%u, dev %s\n",
1898 			NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
1899 #endif
1900 
1901 e_hostunreach:
1902 	err = -EHOSTUNREACH;
1903 	goto done;
1904 
1905 e_inval:
1906 	err = -EINVAL;
1907 	goto done;
1908 
1909 e_nobufs:
1910 	err = -ENOBUFS;
1911 	goto done;
1912 
1913 martian_source:
1914 	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
1915 	goto e_inval;
1916 }
1917 
1918 int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1919 		   u8 tos, struct net_device *dev)
1920 {
1921 	struct rtable * rth;
1922 	unsigned	hash;
1923 	int iif = dev->ifindex;
1924 
1925 	tos &= IPTOS_RT_MASK;
1926 	hash = rt_hash(daddr, saddr, iif);
1927 
1928 	rcu_read_lock();
1929 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1930 	     rth = rcu_dereference(rth->u.dst.rt_next)) {
1931 		if (rth->fl.fl4_dst == daddr &&
1932 		    rth->fl.fl4_src == saddr &&
1933 		    rth->fl.iif == iif &&
1934 		    rth->fl.oif == 0 &&
1935 		    rth->fl.mark == skb->mark &&
1936 		    rth->fl.fl4_tos == tos) {
1937 			dst_use(&rth->u.dst, jiffies);
1938 			RT_CACHE_STAT_INC(in_hit);
1939 			rcu_read_unlock();
1940 			skb->dst = (struct dst_entry*)rth;
1941 			return 0;
1942 		}
1943 		RT_CACHE_STAT_INC(in_hlist_search);
1944 	}
1945 	rcu_read_unlock();
1946 
1947 	/* Multicast recognition logic is moved from route cache to here.
1948 	   The problem was that too many Ethernet cards have broken/missing
1949 	   hardware multicast filters :-( As result the host on multicasting
1950 	   network acquires a lot of useless route cache entries, sort of
1951 	   SDR messages from all the world. Now we try to get rid of them.
1952 	   Really, provided software IP multicast filter is organized
1953 	   reasonably (at least, hashed), it does not result in a slowdown
1954 	   comparing with route cache reject entries.
1955 	   Note, that multicast routers are not affected, because
1956 	   route cache entry is created eventually.
1957 	 */
1958 	if (MULTICAST(daddr)) {
1959 		struct in_device *in_dev;
1960 
1961 		rcu_read_lock();
1962 		if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
1963 			int our = ip_check_mc(in_dev, daddr, saddr,
1964 				ip_hdr(skb)->protocol);
1965 			if (our
1966 #ifdef CONFIG_IP_MROUTE
1967 			    || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
1968 #endif
1969 			    ) {
1970 				rcu_read_unlock();
1971 				return ip_route_input_mc(skb, daddr, saddr,
1972 							 tos, dev, our);
1973 			}
1974 		}
1975 		rcu_read_unlock();
1976 		return -EINVAL;
1977 	}
1978 	return ip_route_input_slow(skb, daddr, saddr, tos, dev);
1979 }
1980 
1981 static inline int __mkroute_output(struct rtable **result,
1982 				   struct fib_result* res,
1983 				   const struct flowi *fl,
1984 				   const struct flowi *oldflp,
1985 				   struct net_device *dev_out,
1986 				   unsigned flags)
1987 {
1988 	struct rtable *rth;
1989 	struct in_device *in_dev;
1990 	u32 tos = RT_FL_TOS(oldflp);
1991 	int err = 0;
1992 
1993 	if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
1994 		return -EINVAL;
1995 
1996 	if (fl->fl4_dst == htonl(0xFFFFFFFF))
1997 		res->type = RTN_BROADCAST;
1998 	else if (MULTICAST(fl->fl4_dst))
1999 		res->type = RTN_MULTICAST;
2000 	else if (BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst))
2001 		return -EINVAL;
2002 
2003 	if (dev_out->flags & IFF_LOOPBACK)
2004 		flags |= RTCF_LOCAL;
2005 
2006 	/* get work reference to inet device */
2007 	in_dev = in_dev_get(dev_out);
2008 	if (!in_dev)
2009 		return -EINVAL;
2010 
2011 	if (res->type == RTN_BROADCAST) {
2012 		flags |= RTCF_BROADCAST | RTCF_LOCAL;
2013 		if (res->fi) {
2014 			fib_info_put(res->fi);
2015 			res->fi = NULL;
2016 		}
2017 	} else if (res->type == RTN_MULTICAST) {
2018 		flags |= RTCF_MULTICAST|RTCF_LOCAL;
2019 		if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2020 				 oldflp->proto))
2021 			flags &= ~RTCF_LOCAL;
2022 		/* If multicast route do not exist use
2023 		   default one, but do not gateway in this case.
2024 		   Yes, it is hack.
2025 		 */
2026 		if (res->fi && res->prefixlen < 4) {
2027 			fib_info_put(res->fi);
2028 			res->fi = NULL;
2029 		}
2030 	}
2031 
2032 
2033 	rth = dst_alloc(&ipv4_dst_ops);
2034 	if (!rth) {
2035 		err = -ENOBUFS;
2036 		goto cleanup;
2037 	}
2038 
2039 	atomic_set(&rth->u.dst.__refcnt, 1);
2040 	rth->u.dst.flags= DST_HOST;
2041 	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2042 		rth->u.dst.flags |= DST_NOXFRM;
2043 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2044 		rth->u.dst.flags |= DST_NOPOLICY;
2045 
2046 	rth->fl.fl4_dst	= oldflp->fl4_dst;
2047 	rth->fl.fl4_tos	= tos;
2048 	rth->fl.fl4_src	= oldflp->fl4_src;
2049 	rth->fl.oif	= oldflp->oif;
2050 	rth->fl.mark    = oldflp->mark;
2051 	rth->rt_dst	= fl->fl4_dst;
2052 	rth->rt_src	= fl->fl4_src;
2053 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
2054 	/* get references to the devices that are to be hold by the routing
2055 	   cache entry */
2056 	rth->u.dst.dev	= dev_out;
2057 	dev_hold(dev_out);
2058 	rth->idev	= in_dev_get(dev_out);
2059 	rth->rt_gateway = fl->fl4_dst;
2060 	rth->rt_spec_dst= fl->fl4_src;
2061 
2062 	rth->u.dst.output=ip_output;
2063 
2064 	RT_CACHE_STAT_INC(out_slow_tot);
2065 
2066 	if (flags & RTCF_LOCAL) {
2067 		rth->u.dst.input = ip_local_deliver;
2068 		rth->rt_spec_dst = fl->fl4_dst;
2069 	}
2070 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2071 		rth->rt_spec_dst = fl->fl4_src;
2072 		if (flags & RTCF_LOCAL &&
2073 		    !(dev_out->flags & IFF_LOOPBACK)) {
2074 			rth->u.dst.output = ip_mc_output;
2075 			RT_CACHE_STAT_INC(out_slow_mc);
2076 		}
2077 #ifdef CONFIG_IP_MROUTE
2078 		if (res->type == RTN_MULTICAST) {
2079 			if (IN_DEV_MFORWARD(in_dev) &&
2080 			    !LOCAL_MCAST(oldflp->fl4_dst)) {
2081 				rth->u.dst.input = ip_mr_input;
2082 				rth->u.dst.output = ip_mc_output;
2083 			}
2084 		}
2085 #endif
2086 	}
2087 
2088 	rt_set_nexthop(rth, res, 0);
2089 
2090 	rth->rt_flags = flags;
2091 
2092 	*result = rth;
2093  cleanup:
2094 	/* release work reference to inet device */
2095 	in_dev_put(in_dev);
2096 
2097 	return err;
2098 }
2099 
2100 static inline int ip_mkroute_output(struct rtable **rp,
2101 				    struct fib_result* res,
2102 				    const struct flowi *fl,
2103 				    const struct flowi *oldflp,
2104 				    struct net_device *dev_out,
2105 				    unsigned flags)
2106 {
2107 	struct rtable *rth = NULL;
2108 	int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2109 	unsigned hash;
2110 	if (err == 0) {
2111 		hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
2112 		err = rt_intern_hash(hash, rth, rp);
2113 	}
2114 
2115 	return err;
2116 }
2117 
2118 /*
2119  * Major route resolver routine.
2120  */
2121 
2122 static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2123 {
2124 	u32 tos	= RT_FL_TOS(oldflp);
2125 	struct flowi fl = { .nl_u = { .ip4_u =
2126 				      { .daddr = oldflp->fl4_dst,
2127 					.saddr = oldflp->fl4_src,
2128 					.tos = tos & IPTOS_RT_MASK,
2129 					.scope = ((tos & RTO_ONLINK) ?
2130 						  RT_SCOPE_LINK :
2131 						  RT_SCOPE_UNIVERSE),
2132 				      } },
2133 			    .mark = oldflp->mark,
2134 			    .iif = init_net.loopback_dev->ifindex,
2135 			    .oif = oldflp->oif };
2136 	struct fib_result res;
2137 	unsigned flags = 0;
2138 	struct net_device *dev_out = NULL;
2139 	int free_res = 0;
2140 	int err;
2141 
2142 
2143 	res.fi		= NULL;
2144 #ifdef CONFIG_IP_MULTIPLE_TABLES
2145 	res.r		= NULL;
2146 #endif
2147 
2148 	if (oldflp->fl4_src) {
2149 		err = -EINVAL;
2150 		if (MULTICAST(oldflp->fl4_src) ||
2151 		    BADCLASS(oldflp->fl4_src) ||
2152 		    ZERONET(oldflp->fl4_src))
2153 			goto out;
2154 
2155 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2156 		dev_out = ip_dev_find(oldflp->fl4_src);
2157 		if (dev_out == NULL)
2158 			goto out;
2159 
2160 		/* I removed check for oif == dev_out->oif here.
2161 		   It was wrong for two reasons:
2162 		   1. ip_dev_find(saddr) can return wrong iface, if saddr is
2163 		      assigned to multiple interfaces.
2164 		   2. Moreover, we are allowed to send packets with saddr
2165 		      of another iface. --ANK
2166 		 */
2167 
2168 		if (oldflp->oif == 0
2169 		    && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2170 			/* Special hack: user can direct multicasts
2171 			   and limited broadcast via necessary interface
2172 			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2173 			   This hack is not just for fun, it allows
2174 			   vic,vat and friends to work.
2175 			   They bind socket to loopback, set ttl to zero
2176 			   and expect that it will work.
2177 			   From the viewpoint of routing cache they are broken,
2178 			   because we are not allowed to build multicast path
2179 			   with loopback source addr (look, routing cache
2180 			   cannot know, that ttl is zero, so that packet
2181 			   will not leave this host and route is valid).
2182 			   Luckily, this hack is good workaround.
2183 			 */
2184 
2185 			fl.oif = dev_out->ifindex;
2186 			goto make_route;
2187 		}
2188 		if (dev_out)
2189 			dev_put(dev_out);
2190 		dev_out = NULL;
2191 	}
2192 
2193 
2194 	if (oldflp->oif) {
2195 		dev_out = dev_get_by_index(&init_net, oldflp->oif);
2196 		err = -ENODEV;
2197 		if (dev_out == NULL)
2198 			goto out;
2199 
2200 		/* RACE: Check return value of inet_select_addr instead. */
2201 		if (__in_dev_get_rtnl(dev_out) == NULL) {
2202 			dev_put(dev_out);
2203 			goto out;	/* Wrong error code */
2204 		}
2205 
2206 		if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
2207 			if (!fl.fl4_src)
2208 				fl.fl4_src = inet_select_addr(dev_out, 0,
2209 							      RT_SCOPE_LINK);
2210 			goto make_route;
2211 		}
2212 		if (!fl.fl4_src) {
2213 			if (MULTICAST(oldflp->fl4_dst))
2214 				fl.fl4_src = inet_select_addr(dev_out, 0,
2215 							      fl.fl4_scope);
2216 			else if (!oldflp->fl4_dst)
2217 				fl.fl4_src = inet_select_addr(dev_out, 0,
2218 							      RT_SCOPE_HOST);
2219 		}
2220 	}
2221 
2222 	if (!fl.fl4_dst) {
2223 		fl.fl4_dst = fl.fl4_src;
2224 		if (!fl.fl4_dst)
2225 			fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2226 		if (dev_out)
2227 			dev_put(dev_out);
2228 		dev_out = init_net.loopback_dev;
2229 		dev_hold(dev_out);
2230 		fl.oif = init_net.loopback_dev->ifindex;
2231 		res.type = RTN_LOCAL;
2232 		flags |= RTCF_LOCAL;
2233 		goto make_route;
2234 	}
2235 
2236 	if (fib_lookup(&fl, &res)) {
2237 		res.fi = NULL;
2238 		if (oldflp->oif) {
2239 			/* Apparently, routing tables are wrong. Assume,
2240 			   that the destination is on link.
2241 
2242 			   WHY? DW.
2243 			   Because we are allowed to send to iface
2244 			   even if it has NO routes and NO assigned
2245 			   addresses. When oif is specified, routing
2246 			   tables are looked up with only one purpose:
2247 			   to catch if destination is gatewayed, rather than
2248 			   direct. Moreover, if MSG_DONTROUTE is set,
2249 			   we send packet, ignoring both routing tables
2250 			   and ifaddr state. --ANK
2251 
2252 
2253 			   We could make it even if oif is unknown,
2254 			   likely IPv6, but we do not.
2255 			 */
2256 
2257 			if (fl.fl4_src == 0)
2258 				fl.fl4_src = inet_select_addr(dev_out, 0,
2259 							      RT_SCOPE_LINK);
2260 			res.type = RTN_UNICAST;
2261 			goto make_route;
2262 		}
2263 		if (dev_out)
2264 			dev_put(dev_out);
2265 		err = -ENETUNREACH;
2266 		goto out;
2267 	}
2268 	free_res = 1;
2269 
2270 	if (res.type == RTN_LOCAL) {
2271 		if (!fl.fl4_src)
2272 			fl.fl4_src = fl.fl4_dst;
2273 		if (dev_out)
2274 			dev_put(dev_out);
2275 		dev_out = init_net.loopback_dev;
2276 		dev_hold(dev_out);
2277 		fl.oif = dev_out->ifindex;
2278 		if (res.fi)
2279 			fib_info_put(res.fi);
2280 		res.fi = NULL;
2281 		flags |= RTCF_LOCAL;
2282 		goto make_route;
2283 	}
2284 
2285 #ifdef CONFIG_IP_ROUTE_MULTIPATH
2286 	if (res.fi->fib_nhs > 1 && fl.oif == 0)
2287 		fib_select_multipath(&fl, &res);
2288 	else
2289 #endif
2290 	if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
2291 		fib_select_default(&fl, &res);
2292 
2293 	if (!fl.fl4_src)
2294 		fl.fl4_src = FIB_RES_PREFSRC(res);
2295 
2296 	if (dev_out)
2297 		dev_put(dev_out);
2298 	dev_out = FIB_RES_DEV(res);
2299 	dev_hold(dev_out);
2300 	fl.oif = dev_out->ifindex;
2301 
2302 
2303 make_route:
2304 	err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2305 
2306 
2307 	if (free_res)
2308 		fib_res_put(&res);
2309 	if (dev_out)
2310 		dev_put(dev_out);
2311 out:	return err;
2312 }
2313 
2314 int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2315 {
2316 	unsigned hash;
2317 	struct rtable *rth;
2318 
2319 	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
2320 
2321 	rcu_read_lock_bh();
2322 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2323 		rth = rcu_dereference(rth->u.dst.rt_next)) {
2324 		if (rth->fl.fl4_dst == flp->fl4_dst &&
2325 		    rth->fl.fl4_src == flp->fl4_src &&
2326 		    rth->fl.iif == 0 &&
2327 		    rth->fl.oif == flp->oif &&
2328 		    rth->fl.mark == flp->mark &&
2329 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2330 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
2331 			dst_use(&rth->u.dst, jiffies);
2332 			RT_CACHE_STAT_INC(out_hit);
2333 			rcu_read_unlock_bh();
2334 			*rp = rth;
2335 			return 0;
2336 		}
2337 		RT_CACHE_STAT_INC(out_hlist_search);
2338 	}
2339 	rcu_read_unlock_bh();
2340 
2341 	return ip_route_output_slow(rp, flp);
2342 }
2343 
2344 EXPORT_SYMBOL_GPL(__ip_route_output_key);
2345 
2346 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
2347 {
2348 }
2349 
2350 static struct dst_ops ipv4_dst_blackhole_ops = {
2351 	.family			=	AF_INET,
2352 	.protocol		=	__constant_htons(ETH_P_IP),
2353 	.destroy		=	ipv4_dst_destroy,
2354 	.check			=	ipv4_dst_check,
2355 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
2356 	.entry_size		=	sizeof(struct rtable),
2357 };
2358 
2359 
2360 static int ipv4_blackhole_output(struct sk_buff *skb)
2361 {
2362 	kfree_skb(skb);
2363 	return 0;
2364 }
2365 
2366 static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock *sk)
2367 {
2368 	struct rtable *ort = *rp;
2369 	struct rtable *rt = (struct rtable *)
2370 		dst_alloc(&ipv4_dst_blackhole_ops);
2371 
2372 	if (rt) {
2373 		struct dst_entry *new = &rt->u.dst;
2374 
2375 		atomic_set(&new->__refcnt, 1);
2376 		new->__use = 1;
2377 		new->input = ipv4_blackhole_output;
2378 		new->output = ipv4_blackhole_output;
2379 		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
2380 
2381 		new->dev = ort->u.dst.dev;
2382 		if (new->dev)
2383 			dev_hold(new->dev);
2384 
2385 		rt->fl = ort->fl;
2386 
2387 		rt->idev = ort->idev;
2388 		if (rt->idev)
2389 			in_dev_hold(rt->idev);
2390 		rt->rt_flags = ort->rt_flags;
2391 		rt->rt_type = ort->rt_type;
2392 		rt->rt_dst = ort->rt_dst;
2393 		rt->rt_src = ort->rt_src;
2394 		rt->rt_iif = ort->rt_iif;
2395 		rt->rt_gateway = ort->rt_gateway;
2396 		rt->rt_spec_dst = ort->rt_spec_dst;
2397 		rt->peer = ort->peer;
2398 		if (rt->peer)
2399 			atomic_inc(&rt->peer->refcnt);
2400 
2401 		dst_free(new);
2402 	}
2403 
2404 	dst_release(&(*rp)->u.dst);
2405 	*rp = rt;
2406 	return (rt ? 0 : -ENOMEM);
2407 }
2408 
2409 int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
2410 {
2411 	int err;
2412 
2413 	if ((err = __ip_route_output_key(rp, flp)) != 0)
2414 		return err;
2415 
2416 	if (flp->proto) {
2417 		if (!flp->fl4_src)
2418 			flp->fl4_src = (*rp)->rt_src;
2419 		if (!flp->fl4_dst)
2420 			flp->fl4_dst = (*rp)->rt_dst;
2421 		err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
2422 		if (err == -EREMOTE)
2423 			err = ipv4_dst_blackhole(rp, flp, sk);
2424 
2425 		return err;
2426 	}
2427 
2428 	return 0;
2429 }
2430 
2431 EXPORT_SYMBOL_GPL(ip_route_output_flow);
2432 
2433 int ip_route_output_key(struct rtable **rp, struct flowi *flp)
2434 {
2435 	return ip_route_output_flow(rp, flp, NULL, 0);
2436 }
2437 
2438 static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2439 			int nowait, unsigned int flags)
2440 {
2441 	struct rtable *rt = (struct rtable*)skb->dst;
2442 	struct rtmsg *r;
2443 	struct nlmsghdr *nlh;
2444 	long expires;
2445 	u32 id = 0, ts = 0, tsage = 0, error;
2446 
2447 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2448 	if (nlh == NULL)
2449 		return -EMSGSIZE;
2450 
2451 	r = nlmsg_data(nlh);
2452 	r->rtm_family	 = AF_INET;
2453 	r->rtm_dst_len	= 32;
2454 	r->rtm_src_len	= 0;
2455 	r->rtm_tos	= rt->fl.fl4_tos;
2456 	r->rtm_table	= RT_TABLE_MAIN;
2457 	NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2458 	r->rtm_type	= rt->rt_type;
2459 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
2460 	r->rtm_protocol = RTPROT_UNSPEC;
2461 	r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2462 	if (rt->rt_flags & RTCF_NOTIFY)
2463 		r->rtm_flags |= RTM_F_NOTIFY;
2464 
2465 	NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
2466 
2467 	if (rt->fl.fl4_src) {
2468 		r->rtm_src_len = 32;
2469 		NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
2470 	}
2471 	if (rt->u.dst.dev)
2472 		NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
2473 #ifdef CONFIG_NET_CLS_ROUTE
2474 	if (rt->u.dst.tclassid)
2475 		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
2476 #endif
2477 	if (rt->fl.iif)
2478 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2479 	else if (rt->rt_src != rt->fl.fl4_src)
2480 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
2481 
2482 	if (rt->rt_dst != rt->rt_gateway)
2483 		NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
2484 
2485 	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2486 		goto nla_put_failure;
2487 
2488 	error = rt->u.dst.error;
2489 	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
2490 	if (rt->peer) {
2491 		id = rt->peer->ip_id_count;
2492 		if (rt->peer->tcp_ts_stamp) {
2493 			ts = rt->peer->tcp_ts;
2494 			tsage = get_seconds() - rt->peer->tcp_ts_stamp;
2495 		}
2496 	}
2497 
2498 	if (rt->fl.iif) {
2499 #ifdef CONFIG_IP_MROUTE
2500 		__be32 dst = rt->rt_dst;
2501 
2502 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
2503 		    IPV4_DEVCONF_ALL(MC_FORWARDING)) {
2504 			int err = ipmr_get_route(skb, r, nowait);
2505 			if (err <= 0) {
2506 				if (!nowait) {
2507 					if (err == 0)
2508 						return 0;
2509 					goto nla_put_failure;
2510 				} else {
2511 					if (err == -EMSGSIZE)
2512 						goto nla_put_failure;
2513 					error = err;
2514 				}
2515 			}
2516 		} else
2517 #endif
2518 			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2519 	}
2520 
2521 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage,
2522 			       expires, error) < 0)
2523 		goto nla_put_failure;
2524 
2525 	return nlmsg_end(skb, nlh);
2526 
2527 nla_put_failure:
2528 	nlmsg_cancel(skb, nlh);
2529 	return -EMSGSIZE;
2530 }
2531 
2532 static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2533 {
2534 	struct rtmsg *rtm;
2535 	struct nlattr *tb[RTA_MAX+1];
2536 	struct rtable *rt = NULL;
2537 	__be32 dst = 0;
2538 	__be32 src = 0;
2539 	u32 iif;
2540 	int err;
2541 	struct sk_buff *skb;
2542 
2543 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2544 	if (err < 0)
2545 		goto errout;
2546 
2547 	rtm = nlmsg_data(nlh);
2548 
2549 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2550 	if (skb == NULL) {
2551 		err = -ENOBUFS;
2552 		goto errout;
2553 	}
2554 
2555 	/* Reserve room for dummy headers, this skb can pass
2556 	   through good chunk of routing engine.
2557 	 */
2558 	skb_reset_mac_header(skb);
2559 	skb_reset_network_header(skb);
2560 
2561 	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
2562 	ip_hdr(skb)->protocol = IPPROTO_ICMP;
2563 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2564 
2565 	src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2566 	dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
2567 	iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2568 
2569 	if (iif) {
2570 		struct net_device *dev;
2571 
2572 		dev = __dev_get_by_index(&init_net, iif);
2573 		if (dev == NULL) {
2574 			err = -ENODEV;
2575 			goto errout_free;
2576 		}
2577 
2578 		skb->protocol	= htons(ETH_P_IP);
2579 		skb->dev	= dev;
2580 		local_bh_disable();
2581 		err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2582 		local_bh_enable();
2583 
2584 		rt = (struct rtable*) skb->dst;
2585 		if (err == 0 && rt->u.dst.error)
2586 			err = -rt->u.dst.error;
2587 	} else {
2588 		struct flowi fl = {
2589 			.nl_u = {
2590 				.ip4_u = {
2591 					.daddr = dst,
2592 					.saddr = src,
2593 					.tos = rtm->rtm_tos,
2594 				},
2595 			},
2596 			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2597 		};
2598 		err = ip_route_output_key(&rt, &fl);
2599 	}
2600 
2601 	if (err)
2602 		goto errout_free;
2603 
2604 	skb->dst = &rt->u.dst;
2605 	if (rtm->rtm_flags & RTM_F_NOTIFY)
2606 		rt->rt_flags |= RTCF_NOTIFY;
2607 
2608 	err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2609 				RTM_NEWROUTE, 0, 0);
2610 	if (err <= 0)
2611 		goto errout_free;
2612 
2613 	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2614 errout:
2615 	return err;
2616 
2617 errout_free:
2618 	kfree_skb(skb);
2619 	goto errout;
2620 }
2621 
2622 int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
2623 {
2624 	struct rtable *rt;
2625 	int h, s_h;
2626 	int idx, s_idx;
2627 
2628 	s_h = cb->args[0];
2629 	if (s_h < 0)
2630 		s_h = 0;
2631 	s_idx = idx = cb->args[1];
2632 	for (h = s_h; h <= rt_hash_mask; h++) {
2633 		rcu_read_lock_bh();
2634 		for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
2635 		     rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2636 			if (idx < s_idx)
2637 				continue;
2638 			skb->dst = dst_clone(&rt->u.dst);
2639 			if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
2640 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2641 					 1, NLM_F_MULTI) <= 0) {
2642 				dst_release(xchg(&skb->dst, NULL));
2643 				rcu_read_unlock_bh();
2644 				goto done;
2645 			}
2646 			dst_release(xchg(&skb->dst, NULL));
2647 		}
2648 		rcu_read_unlock_bh();
2649 		s_idx = 0;
2650 	}
2651 
2652 done:
2653 	cb->args[0] = h;
2654 	cb->args[1] = idx;
2655 	return skb->len;
2656 }
2657 
2658 void ip_rt_multicast_event(struct in_device *in_dev)
2659 {
2660 	rt_cache_flush(0);
2661 }
2662 
2663 #ifdef CONFIG_SYSCTL
2664 static int flush_delay;
2665 
2666 static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
2667 					struct file *filp, void __user *buffer,
2668 					size_t *lenp, loff_t *ppos)
2669 {
2670 	if (write) {
2671 		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2672 		rt_cache_flush(flush_delay);
2673 		return 0;
2674 	}
2675 
2676 	return -EINVAL;
2677 }
2678 
2679 static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
2680 						int __user *name,
2681 						int nlen,
2682 						void __user *oldval,
2683 						size_t __user *oldlenp,
2684 						void __user *newval,
2685 						size_t newlen)
2686 {
2687 	int delay;
2688 	if (newlen != sizeof(int))
2689 		return -EINVAL;
2690 	if (get_user(delay, (int __user *)newval))
2691 		return -EFAULT;
2692 	rt_cache_flush(delay);
2693 	return 0;
2694 }
2695 
2696 ctl_table ipv4_route_table[] = {
2697 	{
2698 		.ctl_name 	= NET_IPV4_ROUTE_FLUSH,
2699 		.procname	= "flush",
2700 		.data		= &flush_delay,
2701 		.maxlen		= sizeof(int),
2702 		.mode		= 0200,
2703 		.proc_handler	= &ipv4_sysctl_rtcache_flush,
2704 		.strategy	= &ipv4_sysctl_rtcache_flush_strategy,
2705 	},
2706 	{
2707 		.ctl_name	= NET_IPV4_ROUTE_MIN_DELAY,
2708 		.procname	= "min_delay",
2709 		.data		= &ip_rt_min_delay,
2710 		.maxlen		= sizeof(int),
2711 		.mode		= 0644,
2712 		.proc_handler	= &proc_dointvec_jiffies,
2713 		.strategy	= &sysctl_jiffies,
2714 	},
2715 	{
2716 		.ctl_name	= NET_IPV4_ROUTE_MAX_DELAY,
2717 		.procname	= "max_delay",
2718 		.data		= &ip_rt_max_delay,
2719 		.maxlen		= sizeof(int),
2720 		.mode		= 0644,
2721 		.proc_handler	= &proc_dointvec_jiffies,
2722 		.strategy	= &sysctl_jiffies,
2723 	},
2724 	{
2725 		.ctl_name	= NET_IPV4_ROUTE_GC_THRESH,
2726 		.procname	= "gc_thresh",
2727 		.data		= &ipv4_dst_ops.gc_thresh,
2728 		.maxlen		= sizeof(int),
2729 		.mode		= 0644,
2730 		.proc_handler	= &proc_dointvec,
2731 	},
2732 	{
2733 		.ctl_name	= NET_IPV4_ROUTE_MAX_SIZE,
2734 		.procname	= "max_size",
2735 		.data		= &ip_rt_max_size,
2736 		.maxlen		= sizeof(int),
2737 		.mode		= 0644,
2738 		.proc_handler	= &proc_dointvec,
2739 	},
2740 	{
2741 		/*  Deprecated. Use gc_min_interval_ms */
2742 
2743 		.ctl_name	= NET_IPV4_ROUTE_GC_MIN_INTERVAL,
2744 		.procname	= "gc_min_interval",
2745 		.data		= &ip_rt_gc_min_interval,
2746 		.maxlen		= sizeof(int),
2747 		.mode		= 0644,
2748 		.proc_handler	= &proc_dointvec_jiffies,
2749 		.strategy	= &sysctl_jiffies,
2750 	},
2751 	{
2752 		.ctl_name	= NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS,
2753 		.procname	= "gc_min_interval_ms",
2754 		.data		= &ip_rt_gc_min_interval,
2755 		.maxlen		= sizeof(int),
2756 		.mode		= 0644,
2757 		.proc_handler	= &proc_dointvec_ms_jiffies,
2758 		.strategy	= &sysctl_ms_jiffies,
2759 	},
2760 	{
2761 		.ctl_name	= NET_IPV4_ROUTE_GC_TIMEOUT,
2762 		.procname	= "gc_timeout",
2763 		.data		= &ip_rt_gc_timeout,
2764 		.maxlen		= sizeof(int),
2765 		.mode		= 0644,
2766 		.proc_handler	= &proc_dointvec_jiffies,
2767 		.strategy	= &sysctl_jiffies,
2768 	},
2769 	{
2770 		.ctl_name	= NET_IPV4_ROUTE_GC_INTERVAL,
2771 		.procname	= "gc_interval",
2772 		.data		= &ip_rt_gc_interval,
2773 		.maxlen		= sizeof(int),
2774 		.mode		= 0644,
2775 		.proc_handler	= &proc_dointvec_jiffies,
2776 		.strategy	= &sysctl_jiffies,
2777 	},
2778 	{
2779 		.ctl_name	= NET_IPV4_ROUTE_REDIRECT_LOAD,
2780 		.procname	= "redirect_load",
2781 		.data		= &ip_rt_redirect_load,
2782 		.maxlen		= sizeof(int),
2783 		.mode		= 0644,
2784 		.proc_handler	= &proc_dointvec,
2785 	},
2786 	{
2787 		.ctl_name	= NET_IPV4_ROUTE_REDIRECT_NUMBER,
2788 		.procname	= "redirect_number",
2789 		.data		= &ip_rt_redirect_number,
2790 		.maxlen		= sizeof(int),
2791 		.mode		= 0644,
2792 		.proc_handler	= &proc_dointvec,
2793 	},
2794 	{
2795 		.ctl_name	= NET_IPV4_ROUTE_REDIRECT_SILENCE,
2796 		.procname	= "redirect_silence",
2797 		.data		= &ip_rt_redirect_silence,
2798 		.maxlen		= sizeof(int),
2799 		.mode		= 0644,
2800 		.proc_handler	= &proc_dointvec,
2801 	},
2802 	{
2803 		.ctl_name	= NET_IPV4_ROUTE_ERROR_COST,
2804 		.procname	= "error_cost",
2805 		.data		= &ip_rt_error_cost,
2806 		.maxlen		= sizeof(int),
2807 		.mode		= 0644,
2808 		.proc_handler	= &proc_dointvec,
2809 	},
2810 	{
2811 		.ctl_name	= NET_IPV4_ROUTE_ERROR_BURST,
2812 		.procname	= "error_burst",
2813 		.data		= &ip_rt_error_burst,
2814 		.maxlen		= sizeof(int),
2815 		.mode		= 0644,
2816 		.proc_handler	= &proc_dointvec,
2817 	},
2818 	{
2819 		.ctl_name	= NET_IPV4_ROUTE_GC_ELASTICITY,
2820 		.procname	= "gc_elasticity",
2821 		.data		= &ip_rt_gc_elasticity,
2822 		.maxlen		= sizeof(int),
2823 		.mode		= 0644,
2824 		.proc_handler	= &proc_dointvec,
2825 	},
2826 	{
2827 		.ctl_name	= NET_IPV4_ROUTE_MTU_EXPIRES,
2828 		.procname	= "mtu_expires",
2829 		.data		= &ip_rt_mtu_expires,
2830 		.maxlen		= sizeof(int),
2831 		.mode		= 0644,
2832 		.proc_handler	= &proc_dointvec_jiffies,
2833 		.strategy	= &sysctl_jiffies,
2834 	},
2835 	{
2836 		.ctl_name	= NET_IPV4_ROUTE_MIN_PMTU,
2837 		.procname	= "min_pmtu",
2838 		.data		= &ip_rt_min_pmtu,
2839 		.maxlen		= sizeof(int),
2840 		.mode		= 0644,
2841 		.proc_handler	= &proc_dointvec,
2842 	},
2843 	{
2844 		.ctl_name	= NET_IPV4_ROUTE_MIN_ADVMSS,
2845 		.procname	= "min_adv_mss",
2846 		.data		= &ip_rt_min_advmss,
2847 		.maxlen		= sizeof(int),
2848 		.mode		= 0644,
2849 		.proc_handler	= &proc_dointvec,
2850 	},
2851 	{
2852 		.ctl_name	= NET_IPV4_ROUTE_SECRET_INTERVAL,
2853 		.procname	= "secret_interval",
2854 		.data		= &ip_rt_secret_interval,
2855 		.maxlen		= sizeof(int),
2856 		.mode		= 0644,
2857 		.proc_handler	= &proc_dointvec_jiffies,
2858 		.strategy	= &sysctl_jiffies,
2859 	},
2860 	{ .ctl_name = 0 }
2861 };
2862 #endif
2863 
2864 #ifdef CONFIG_NET_CLS_ROUTE
2865 struct ip_rt_acct *ip_rt_acct;
2866 
2867 /* This code sucks.  But you should have seen it before! --RR */
2868 
2869 /* IP route accounting ptr for this logical cpu number. */
2870 #define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256)
2871 
2872 #ifdef CONFIG_PROC_FS
2873 static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
2874 			   int length, int *eof, void *data)
2875 {
2876 	unsigned int i;
2877 
2878 	if ((offset & 3) || (length & 3))
2879 		return -EIO;
2880 
2881 	if (offset >= sizeof(struct ip_rt_acct) * 256) {
2882 		*eof = 1;
2883 		return 0;
2884 	}
2885 
2886 	if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
2887 		length = sizeof(struct ip_rt_acct) * 256 - offset;
2888 		*eof = 1;
2889 	}
2890 
2891 	offset /= sizeof(u32);
2892 
2893 	if (length > 0) {
2894 		u32 *dst = (u32 *) buffer;
2895 
2896 		*start = buffer;
2897 		memset(dst, 0, length);
2898 
2899 		for_each_possible_cpu(i) {
2900 			unsigned int j;
2901 			u32 *src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
2902 
2903 			for (j = 0; j < length/4; j++)
2904 				dst[j] += src[j];
2905 		}
2906 	}
2907 	return length;
2908 }
2909 #endif /* CONFIG_PROC_FS */
2910 #endif /* CONFIG_NET_CLS_ROUTE */
2911 
2912 static __initdata unsigned long rhash_entries;
2913 static int __init set_rhash_entries(char *str)
2914 {
2915 	if (!str)
2916 		return 0;
2917 	rhash_entries = simple_strtoul(str, &str, 0);
2918 	return 1;
2919 }
2920 __setup("rhash_entries=", set_rhash_entries);
2921 
2922 int __init ip_rt_init(void)
2923 {
2924 	int rc = 0;
2925 
2926 	rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
2927 			     (jiffies ^ (jiffies >> 7)));
2928 
2929 #ifdef CONFIG_NET_CLS_ROUTE
2930 	{
2931 	int order;
2932 	for (order = 0;
2933 	     (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
2934 		/* NOTHING */;
2935 	ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order);
2936 	if (!ip_rt_acct)
2937 		panic("IP: failed to allocate ip_rt_acct\n");
2938 	memset(ip_rt_acct, 0, PAGE_SIZE << order);
2939 	}
2940 #endif
2941 
2942 	ipv4_dst_ops.kmem_cachep =
2943 		kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
2944 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2945 
2946 	ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2947 
2948 	rt_hash_table = (struct rt_hash_bucket *)
2949 		alloc_large_system_hash("IP route cache",
2950 					sizeof(struct rt_hash_bucket),
2951 					rhash_entries,
2952 					(num_physpages >= 128 * 1024) ?
2953 					15 : 17,
2954 					0,
2955 					&rt_hash_log,
2956 					&rt_hash_mask,
2957 					0);
2958 	memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
2959 	rt_hash_lock_init();
2960 
2961 	ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
2962 	ip_rt_max_size = (rt_hash_mask + 1) * 16;
2963 
2964 	devinet_init();
2965 	ip_fib_init();
2966 
2967 	init_timer(&rt_flush_timer);
2968 	rt_flush_timer.function = rt_run_flush;
2969 	init_timer(&rt_secret_timer);
2970 	rt_secret_timer.function = rt_secret_rebuild;
2971 
2972 	/* All the timers, started at system startup tend
2973 	   to synchronize. Perturb it a bit.
2974 	 */
2975 	schedule_delayed_work(&expires_work,
2976 		net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
2977 
2978 	rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
2979 		ip_rt_secret_interval;
2980 	add_timer(&rt_secret_timer);
2981 
2982 #ifdef CONFIG_PROC_FS
2983 	{
2984 	struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
2985 	if (!proc_net_fops_create(&init_net, "rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
2986 	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
2987 					     init_net.proc_net_stat))) {
2988 		return -ENOMEM;
2989 	}
2990 	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
2991 	}
2992 #ifdef CONFIG_NET_CLS_ROUTE
2993 	create_proc_read_entry("rt_acct", 0, init_net.proc_net, ip_rt_acct_read, NULL);
2994 #endif
2995 #endif
2996 #ifdef CONFIG_XFRM
2997 	xfrm_init();
2998 	xfrm4_init();
2999 #endif
3000 	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
3001 
3002 	return rc;
3003 }
3004 
3005 EXPORT_SYMBOL(__ip_select_ident);
3006 EXPORT_SYMBOL(ip_route_input);
3007 EXPORT_SYMBOL(ip_route_output_key);
3008