1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * ROUTE - implementation of the IP router. 7 * 8 * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $ 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Alan Cox, <gw4pts@gw4pts.ampr.org> 13 * Linus Torvalds, <Linus.Torvalds@helsinki.fi> 14 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 15 * 16 * Fixes: 17 * Alan Cox : Verify area fixes. 18 * Alan Cox : cli() protects routing changes 19 * Rui Oliveira : ICMP routing table updates 20 * (rco@di.uminho.pt) Routing table insertion and update 21 * Linus Torvalds : Rewrote bits to be sensible 22 * Alan Cox : Added BSD route gw semantics 23 * Alan Cox : Super /proc >4K 24 * Alan Cox : MTU in route table 25 * Alan Cox : MSS actually. Also added the window 26 * clamper. 27 * Sam Lantinga : Fixed route matching in rt_del() 28 * Alan Cox : Routing cache support. 29 * Alan Cox : Removed compatibility cruft. 30 * Alan Cox : RTF_REJECT support. 31 * Alan Cox : TCP irtt support. 32 * Jonathan Naylor : Added Metric support. 33 * Miquel van Smoorenburg : BSD API fixes. 34 * Miquel van Smoorenburg : Metrics. 35 * Alan Cox : Use __u32 properly 36 * Alan Cox : Aligned routing errors more closely with BSD 37 * our system is still very different. 38 * Alan Cox : Faster /proc handling 39 * Alexey Kuznetsov : Massive rework to support tree based routing, 40 * routing caches and better behaviour. 41 * 42 * Olaf Erb : irtt wasn't being copied right. 43 * Bjorn Ekwall : Kerneld route support. 44 * Alan Cox : Multicast fixed (I hope) 45 * Pavel Krauz : Limited broadcast fixed 46 * Mike McLagan : Routing by source 47 * Alexey Kuznetsov : End of old history. Split to fib.c and 48 * route.c and rewritten from scratch. 49 * Andi Kleen : Load-limit warning messages. 50 * Vitaly E. Lavrov : Transparent proxy revived after year coma. 51 * Vitaly E. Lavrov : Race condition in ip_route_input_slow. 52 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. 53 * Vladimir V. Ivanov : IP rule info (flowid) is really useful. 54 * Marc Boucher : routing by fwmark 55 * Robert Olsson : Added rt_cache statistics 56 * Arnaldo C. Melo : Convert proc stuff to seq_file 57 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. 58 * 59 * This program is free software; you can redistribute it and/or 60 * modify it under the terms of the GNU General Public License 61 * as published by the Free Software Foundation; either version 62 * 2 of the License, or (at your option) any later version. 63 */ 64 65 #include <linux/config.h> 66 #include <linux/module.h> 67 #include <asm/uaccess.h> 68 #include <asm/system.h> 69 #include <linux/bitops.h> 70 #include <linux/types.h> 71 #include <linux/kernel.h> 72 #include <linux/sched.h> 73 #include <linux/mm.h> 74 #include <linux/bootmem.h> 75 #include <linux/string.h> 76 #include <linux/socket.h> 77 #include <linux/sockios.h> 78 #include <linux/errno.h> 79 #include <linux/in.h> 80 #include <linux/inet.h> 81 #include <linux/netdevice.h> 82 #include <linux/proc_fs.h> 83 #include <linux/init.h> 84 #include <linux/skbuff.h> 85 #include <linux/rtnetlink.h> 86 #include <linux/inetdevice.h> 87 #include <linux/igmp.h> 88 #include <linux/pkt_sched.h> 89 #include <linux/mroute.h> 90 #include <linux/netfilter_ipv4.h> 91 #include <linux/random.h> 92 #include <linux/jhash.h> 93 #include <linux/rcupdate.h> 94 #include <linux/times.h> 95 #include <net/protocol.h> 96 #include <net/ip.h> 97 #include <net/route.h> 98 #include <net/inetpeer.h> 99 #include <net/sock.h> 100 #include <net/ip_fib.h> 101 #include <net/arp.h> 102 #include <net/tcp.h> 103 #include <net/icmp.h> 104 #include <net/xfrm.h> 105 #include <net/ip_mp_alg.h> 106 #ifdef CONFIG_SYSCTL 107 #include <linux/sysctl.h> 108 #endif 109 110 #define RT_FL_TOS(oldflp) \ 111 ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 112 113 #define IP_MAX_MTU 0xFFF0 114 115 #define RT_GC_TIMEOUT (300*HZ) 116 117 static int ip_rt_min_delay = 2 * HZ; 118 static int ip_rt_max_delay = 10 * HZ; 119 static int ip_rt_max_size; 120 static int ip_rt_gc_timeout = RT_GC_TIMEOUT; 121 static int ip_rt_gc_interval = 60 * HZ; 122 static int ip_rt_gc_min_interval = HZ / 2; 123 static int ip_rt_redirect_number = 9; 124 static int ip_rt_redirect_load = HZ / 50; 125 static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); 126 static int ip_rt_error_cost = HZ; 127 static int ip_rt_error_burst = 5 * HZ; 128 static int ip_rt_gc_elasticity = 8; 129 static int ip_rt_mtu_expires = 10 * 60 * HZ; 130 static int ip_rt_min_pmtu = 512 + 20 + 20; 131 static int ip_rt_min_advmss = 256; 132 static int ip_rt_secret_interval = 10 * 60 * HZ; 133 static unsigned long rt_deadline; 134 135 #define RTprint(a...) printk(KERN_DEBUG a) 136 137 static struct timer_list rt_flush_timer; 138 static struct timer_list rt_periodic_timer; 139 static struct timer_list rt_secret_timer; 140 141 /* 142 * Interface to generic destination cache. 143 */ 144 145 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); 146 static void ipv4_dst_destroy(struct dst_entry *dst); 147 static void ipv4_dst_ifdown(struct dst_entry *dst, 148 struct net_device *dev, int how); 149 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 150 static void ipv4_link_failure(struct sk_buff *skb); 151 static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 152 static int rt_garbage_collect(void); 153 154 155 static struct dst_ops ipv4_dst_ops = { 156 .family = AF_INET, 157 .protocol = __constant_htons(ETH_P_IP), 158 .gc = rt_garbage_collect, 159 .check = ipv4_dst_check, 160 .destroy = ipv4_dst_destroy, 161 .ifdown = ipv4_dst_ifdown, 162 .negative_advice = ipv4_negative_advice, 163 .link_failure = ipv4_link_failure, 164 .update_pmtu = ip_rt_update_pmtu, 165 .entry_size = sizeof(struct rtable), 166 }; 167 168 #define ECN_OR_COST(class) TC_PRIO_##class 169 170 __u8 ip_tos2prio[16] = { 171 TC_PRIO_BESTEFFORT, 172 ECN_OR_COST(FILLER), 173 TC_PRIO_BESTEFFORT, 174 ECN_OR_COST(BESTEFFORT), 175 TC_PRIO_BULK, 176 ECN_OR_COST(BULK), 177 TC_PRIO_BULK, 178 ECN_OR_COST(BULK), 179 TC_PRIO_INTERACTIVE, 180 ECN_OR_COST(INTERACTIVE), 181 TC_PRIO_INTERACTIVE, 182 ECN_OR_COST(INTERACTIVE), 183 TC_PRIO_INTERACTIVE_BULK, 184 ECN_OR_COST(INTERACTIVE_BULK), 185 TC_PRIO_INTERACTIVE_BULK, 186 ECN_OR_COST(INTERACTIVE_BULK) 187 }; 188 189 190 /* 191 * Route cache. 192 */ 193 194 /* The locking scheme is rather straight forward: 195 * 196 * 1) Read-Copy Update protects the buckets of the central route hash. 197 * 2) Only writers remove entries, and they hold the lock 198 * as they look at rtable reference counts. 199 * 3) Only readers acquire references to rtable entries, 200 * they do so with atomic increments and with the 201 * lock held. 202 */ 203 204 struct rt_hash_bucket { 205 struct rtable *chain; 206 }; 207 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) 208 /* 209 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks 210 * The size of this table is a power of two and depends on the number of CPUS. 211 */ 212 #if NR_CPUS >= 32 213 #define RT_HASH_LOCK_SZ 4096 214 #elif NR_CPUS >= 16 215 #define RT_HASH_LOCK_SZ 2048 216 #elif NR_CPUS >= 8 217 #define RT_HASH_LOCK_SZ 1024 218 #elif NR_CPUS >= 4 219 #define RT_HASH_LOCK_SZ 512 220 #else 221 #define RT_HASH_LOCK_SZ 256 222 #endif 223 224 static spinlock_t *rt_hash_locks; 225 # define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] 226 # define rt_hash_lock_init() { \ 227 int i; \ 228 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \ 229 if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \ 230 for (i = 0; i < RT_HASH_LOCK_SZ; i++) \ 231 spin_lock_init(&rt_hash_locks[i]); \ 232 } 233 #else 234 # define rt_hash_lock_addr(slot) NULL 235 # define rt_hash_lock_init() 236 #endif 237 238 static struct rt_hash_bucket *rt_hash_table; 239 static unsigned rt_hash_mask; 240 static int rt_hash_log; 241 static unsigned int rt_hash_rnd; 242 243 struct rt_cache_stat *rt_cache_stat; 244 245 static int rt_intern_hash(unsigned hash, struct rtable *rth, 246 struct rtable **res); 247 248 static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos) 249 { 250 return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd) 251 & rt_hash_mask); 252 } 253 254 #ifdef CONFIG_PROC_FS 255 struct rt_cache_iter_state { 256 int bucket; 257 }; 258 259 static struct rtable *rt_cache_get_first(struct seq_file *seq) 260 { 261 struct rtable *r = NULL; 262 struct rt_cache_iter_state *st = seq->private; 263 264 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 265 rcu_read_lock_bh(); 266 r = rt_hash_table[st->bucket].chain; 267 if (r) 268 break; 269 rcu_read_unlock_bh(); 270 } 271 return r; 272 } 273 274 static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) 275 { 276 struct rt_cache_iter_state *st = rcu_dereference(seq->private); 277 278 r = r->u.rt_next; 279 while (!r) { 280 rcu_read_unlock_bh(); 281 if (--st->bucket < 0) 282 break; 283 rcu_read_lock_bh(); 284 r = rt_hash_table[st->bucket].chain; 285 } 286 return r; 287 } 288 289 static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) 290 { 291 struct rtable *r = rt_cache_get_first(seq); 292 293 if (r) 294 while (pos && (r = rt_cache_get_next(seq, r))) 295 --pos; 296 return pos ? NULL : r; 297 } 298 299 static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 300 { 301 return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 302 } 303 304 static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 305 { 306 struct rtable *r = NULL; 307 308 if (v == SEQ_START_TOKEN) 309 r = rt_cache_get_first(seq); 310 else 311 r = rt_cache_get_next(seq, v); 312 ++*pos; 313 return r; 314 } 315 316 static void rt_cache_seq_stop(struct seq_file *seq, void *v) 317 { 318 if (v && v != SEQ_START_TOKEN) 319 rcu_read_unlock_bh(); 320 } 321 322 static int rt_cache_seq_show(struct seq_file *seq, void *v) 323 { 324 if (v == SEQ_START_TOKEN) 325 seq_printf(seq, "%-127s\n", 326 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" 327 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" 328 "HHUptod\tSpecDst"); 329 else { 330 struct rtable *r = v; 331 char temp[256]; 332 333 sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" 334 "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X", 335 r->u.dst.dev ? r->u.dst.dev->name : "*", 336 (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, 337 r->rt_flags, atomic_read(&r->u.dst.__refcnt), 338 r->u.dst.__use, 0, (unsigned long)r->rt_src, 339 (dst_metric(&r->u.dst, RTAX_ADVMSS) ? 340 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), 341 dst_metric(&r->u.dst, RTAX_WINDOW), 342 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + 343 dst_metric(&r->u.dst, RTAX_RTTVAR)), 344 r->fl.fl4_tos, 345 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, 346 r->u.dst.hh ? (r->u.dst.hh->hh_output == 347 dev_queue_xmit) : 0, 348 r->rt_spec_dst); 349 seq_printf(seq, "%-127s\n", temp); 350 } 351 return 0; 352 } 353 354 static struct seq_operations rt_cache_seq_ops = { 355 .start = rt_cache_seq_start, 356 .next = rt_cache_seq_next, 357 .stop = rt_cache_seq_stop, 358 .show = rt_cache_seq_show, 359 }; 360 361 static int rt_cache_seq_open(struct inode *inode, struct file *file) 362 { 363 struct seq_file *seq; 364 int rc = -ENOMEM; 365 struct rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); 366 367 if (!s) 368 goto out; 369 rc = seq_open(file, &rt_cache_seq_ops); 370 if (rc) 371 goto out_kfree; 372 seq = file->private_data; 373 seq->private = s; 374 memset(s, 0, sizeof(*s)); 375 out: 376 return rc; 377 out_kfree: 378 kfree(s); 379 goto out; 380 } 381 382 static struct file_operations rt_cache_seq_fops = { 383 .owner = THIS_MODULE, 384 .open = rt_cache_seq_open, 385 .read = seq_read, 386 .llseek = seq_lseek, 387 .release = seq_release_private, 388 }; 389 390 391 static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) 392 { 393 int cpu; 394 395 if (*pos == 0) 396 return SEQ_START_TOKEN; 397 398 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { 399 if (!cpu_possible(cpu)) 400 continue; 401 *pos = cpu+1; 402 return per_cpu_ptr(rt_cache_stat, cpu); 403 } 404 return NULL; 405 } 406 407 static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) 408 { 409 int cpu; 410 411 for (cpu = *pos; cpu < NR_CPUS; ++cpu) { 412 if (!cpu_possible(cpu)) 413 continue; 414 *pos = cpu+1; 415 return per_cpu_ptr(rt_cache_stat, cpu); 416 } 417 return NULL; 418 419 } 420 421 static void rt_cpu_seq_stop(struct seq_file *seq, void *v) 422 { 423 424 } 425 426 static int rt_cpu_seq_show(struct seq_file *seq, void *v) 427 { 428 struct rt_cache_stat *st = v; 429 430 if (v == SEQ_START_TOKEN) { 431 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); 432 return 0; 433 } 434 435 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 436 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 437 atomic_read(&ipv4_dst_ops.entries), 438 st->in_hit, 439 st->in_slow_tot, 440 st->in_slow_mc, 441 st->in_no_route, 442 st->in_brd, 443 st->in_martian_dst, 444 st->in_martian_src, 445 446 st->out_hit, 447 st->out_slow_tot, 448 st->out_slow_mc, 449 450 st->gc_total, 451 st->gc_ignored, 452 st->gc_goal_miss, 453 st->gc_dst_overflow, 454 st->in_hlist_search, 455 st->out_hlist_search 456 ); 457 return 0; 458 } 459 460 static struct seq_operations rt_cpu_seq_ops = { 461 .start = rt_cpu_seq_start, 462 .next = rt_cpu_seq_next, 463 .stop = rt_cpu_seq_stop, 464 .show = rt_cpu_seq_show, 465 }; 466 467 468 static int rt_cpu_seq_open(struct inode *inode, struct file *file) 469 { 470 return seq_open(file, &rt_cpu_seq_ops); 471 } 472 473 static struct file_operations rt_cpu_seq_fops = { 474 .owner = THIS_MODULE, 475 .open = rt_cpu_seq_open, 476 .read = seq_read, 477 .llseek = seq_lseek, 478 .release = seq_release, 479 }; 480 481 #endif /* CONFIG_PROC_FS */ 482 483 static __inline__ void rt_free(struct rtable *rt) 484 { 485 multipath_remove(rt); 486 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 487 } 488 489 static __inline__ void rt_drop(struct rtable *rt) 490 { 491 multipath_remove(rt); 492 ip_rt_put(rt); 493 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 494 } 495 496 static __inline__ int rt_fast_clean(struct rtable *rth) 497 { 498 /* Kill broadcast/multicast entries very aggresively, if they 499 collide in hash table with more useful entries */ 500 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && 501 rth->fl.iif && rth->u.rt_next; 502 } 503 504 static __inline__ int rt_valuable(struct rtable *rth) 505 { 506 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 507 rth->u.dst.expires; 508 } 509 510 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 511 { 512 unsigned long age; 513 int ret = 0; 514 515 if (atomic_read(&rth->u.dst.__refcnt)) 516 goto out; 517 518 ret = 1; 519 if (rth->u.dst.expires && 520 time_after_eq(jiffies, rth->u.dst.expires)) 521 goto out; 522 523 age = jiffies - rth->u.dst.lastuse; 524 ret = 0; 525 if ((age <= tmo1 && !rt_fast_clean(rth)) || 526 (age <= tmo2 && rt_valuable(rth))) 527 goto out; 528 ret = 1; 529 out: return ret; 530 } 531 532 /* Bits of score are: 533 * 31: very valuable 534 * 30: not quite useless 535 * 29..0: usage counter 536 */ 537 static inline u32 rt_score(struct rtable *rt) 538 { 539 u32 score = jiffies - rt->u.dst.lastuse; 540 541 score = ~score & ~(3<<30); 542 543 if (rt_valuable(rt)) 544 score |= (1<<31); 545 546 if (!rt->fl.iif || 547 !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) 548 score |= (1<<30); 549 550 return score; 551 } 552 553 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 554 { 555 return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 && 556 fl1->oif == fl2->oif && 557 fl1->iif == fl2->iif; 558 } 559 560 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 561 static struct rtable **rt_remove_balanced_route(struct rtable **chain_head, 562 struct rtable *expentry, 563 int *removed_count) 564 { 565 int passedexpired = 0; 566 struct rtable **nextstep = NULL; 567 struct rtable **rthp = chain_head; 568 struct rtable *rth; 569 570 if (removed_count) 571 *removed_count = 0; 572 573 while ((rth = *rthp) != NULL) { 574 if (rth == expentry) 575 passedexpired = 1; 576 577 if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 && 578 compare_keys(&(*rthp)->fl, &expentry->fl)) { 579 if (*rthp == expentry) { 580 *rthp = rth->u.rt_next; 581 continue; 582 } else { 583 *rthp = rth->u.rt_next; 584 rt_free(rth); 585 if (removed_count) 586 ++(*removed_count); 587 } 588 } else { 589 if (!((*rthp)->u.dst.flags & DST_BALANCED) && 590 passedexpired && !nextstep) 591 nextstep = &rth->u.rt_next; 592 593 rthp = &rth->u.rt_next; 594 } 595 } 596 597 rt_free(expentry); 598 if (removed_count) 599 ++(*removed_count); 600 601 return nextstep; 602 } 603 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 604 605 606 /* This runs via a timer and thus is always in BH context. */ 607 static void rt_check_expire(unsigned long dummy) 608 { 609 static unsigned int rover; 610 unsigned int i = rover, goal; 611 struct rtable *rth, **rthp; 612 unsigned long now = jiffies; 613 u64 mult; 614 615 mult = ((u64)ip_rt_gc_interval) << rt_hash_log; 616 if (ip_rt_gc_timeout > 1) 617 do_div(mult, ip_rt_gc_timeout); 618 goal = (unsigned int)mult; 619 if (goal > rt_hash_mask) goal = rt_hash_mask + 1; 620 for (; goal > 0; goal--) { 621 unsigned long tmo = ip_rt_gc_timeout; 622 623 i = (i + 1) & rt_hash_mask; 624 rthp = &rt_hash_table[i].chain; 625 626 if (*rthp == 0) 627 continue; 628 spin_lock(rt_hash_lock_addr(i)); 629 while ((rth = *rthp) != NULL) { 630 if (rth->u.dst.expires) { 631 /* Entry is expired even if it is in use */ 632 if (time_before_eq(now, rth->u.dst.expires)) { 633 tmo >>= 1; 634 rthp = &rth->u.rt_next; 635 continue; 636 } 637 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { 638 tmo >>= 1; 639 rthp = &rth->u.rt_next; 640 continue; 641 } 642 643 /* Cleanup aged off entries. */ 644 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 645 /* remove all related balanced entries if necessary */ 646 if (rth->u.dst.flags & DST_BALANCED) { 647 rthp = rt_remove_balanced_route( 648 &rt_hash_table[i].chain, 649 rth, NULL); 650 if (!rthp) 651 break; 652 } else { 653 *rthp = rth->u.rt_next; 654 rt_free(rth); 655 } 656 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 657 *rthp = rth->u.rt_next; 658 rt_free(rth); 659 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 660 } 661 spin_unlock(rt_hash_lock_addr(i)); 662 663 /* Fallback loop breaker. */ 664 if (time_after(jiffies, now)) 665 break; 666 } 667 rover = i; 668 mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval); 669 } 670 671 /* This can run from both BH and non-BH contexts, the latter 672 * in the case of a forced flush event. 673 */ 674 static void rt_run_flush(unsigned long dummy) 675 { 676 int i; 677 struct rtable *rth, *next; 678 679 rt_deadline = 0; 680 681 get_random_bytes(&rt_hash_rnd, 4); 682 683 for (i = rt_hash_mask; i >= 0; i--) { 684 spin_lock_bh(rt_hash_lock_addr(i)); 685 rth = rt_hash_table[i].chain; 686 if (rth) 687 rt_hash_table[i].chain = NULL; 688 spin_unlock_bh(rt_hash_lock_addr(i)); 689 690 for (; rth; rth = next) { 691 next = rth->u.rt_next; 692 rt_free(rth); 693 } 694 } 695 } 696 697 static DEFINE_SPINLOCK(rt_flush_lock); 698 699 void rt_cache_flush(int delay) 700 { 701 unsigned long now = jiffies; 702 int user_mode = !in_softirq(); 703 704 if (delay < 0) 705 delay = ip_rt_min_delay; 706 707 /* flush existing multipath state*/ 708 multipath_flush(); 709 710 spin_lock_bh(&rt_flush_lock); 711 712 if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { 713 long tmo = (long)(rt_deadline - now); 714 715 /* If flush timer is already running 716 and flush request is not immediate (delay > 0): 717 718 if deadline is not achieved, prolongate timer to "delay", 719 otherwise fire it at deadline time. 720 */ 721 722 if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay) 723 tmo = 0; 724 725 if (delay > tmo) 726 delay = tmo; 727 } 728 729 if (delay <= 0) { 730 spin_unlock_bh(&rt_flush_lock); 731 rt_run_flush(0); 732 return; 733 } 734 735 if (rt_deadline == 0) 736 rt_deadline = now + ip_rt_max_delay; 737 738 mod_timer(&rt_flush_timer, now+delay); 739 spin_unlock_bh(&rt_flush_lock); 740 } 741 742 static void rt_secret_rebuild(unsigned long dummy) 743 { 744 unsigned long now = jiffies; 745 746 rt_cache_flush(0); 747 mod_timer(&rt_secret_timer, now + ip_rt_secret_interval); 748 } 749 750 /* 751 Short description of GC goals. 752 753 We want to build algorithm, which will keep routing cache 754 at some equilibrium point, when number of aged off entries 755 is kept approximately equal to newly generated ones. 756 757 Current expiration strength is variable "expire". 758 We try to adjust it dynamically, so that if networking 759 is idle expires is large enough to keep enough of warm entries, 760 and when load increases it reduces to limit cache size. 761 */ 762 763 static int rt_garbage_collect(void) 764 { 765 static unsigned long expire = RT_GC_TIMEOUT; 766 static unsigned long last_gc; 767 static int rover; 768 static int equilibrium; 769 struct rtable *rth, **rthp; 770 unsigned long now = jiffies; 771 int goal; 772 773 /* 774 * Garbage collection is pretty expensive, 775 * do not make it too frequently. 776 */ 777 778 RT_CACHE_STAT_INC(gc_total); 779 780 if (now - last_gc < ip_rt_gc_min_interval && 781 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { 782 RT_CACHE_STAT_INC(gc_ignored); 783 goto out; 784 } 785 786 /* Calculate number of entries, which we want to expire now. */ 787 goal = atomic_read(&ipv4_dst_ops.entries) - 788 (ip_rt_gc_elasticity << rt_hash_log); 789 if (goal <= 0) { 790 if (equilibrium < ipv4_dst_ops.gc_thresh) 791 equilibrium = ipv4_dst_ops.gc_thresh; 792 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 793 if (goal > 0) { 794 equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1); 795 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 796 } 797 } else { 798 /* We are in dangerous area. Try to reduce cache really 799 * aggressively. 800 */ 801 goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1); 802 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 803 } 804 805 if (now - last_gc >= ip_rt_gc_min_interval) 806 last_gc = now; 807 808 if (goal <= 0) { 809 equilibrium += goal; 810 goto work_done; 811 } 812 813 do { 814 int i, k; 815 816 for (i = rt_hash_mask, k = rover; i >= 0; i--) { 817 unsigned long tmo = expire; 818 819 k = (k + 1) & rt_hash_mask; 820 rthp = &rt_hash_table[k].chain; 821 spin_lock_bh(rt_hash_lock_addr(k)); 822 while ((rth = *rthp) != NULL) { 823 if (!rt_may_expire(rth, tmo, expire)) { 824 tmo >>= 1; 825 rthp = &rth->u.rt_next; 826 continue; 827 } 828 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 829 /* remove all related balanced entries 830 * if necessary 831 */ 832 if (rth->u.dst.flags & DST_BALANCED) { 833 int r; 834 835 rthp = rt_remove_balanced_route( 836 &rt_hash_table[i].chain, 837 rth, 838 &r); 839 goal -= r; 840 if (!rthp) 841 break; 842 } else { 843 *rthp = rth->u.rt_next; 844 rt_free(rth); 845 goal--; 846 } 847 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 848 *rthp = rth->u.rt_next; 849 rt_free(rth); 850 goal--; 851 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 852 } 853 spin_unlock_bh(rt_hash_lock_addr(k)); 854 if (goal <= 0) 855 break; 856 } 857 rover = k; 858 859 if (goal <= 0) 860 goto work_done; 861 862 /* Goal is not achieved. We stop process if: 863 864 - if expire reduced to zero. Otherwise, expire is halfed. 865 - if table is not full. 866 - if we are called from interrupt. 867 - jiffies check is just fallback/debug loop breaker. 868 We will not spin here for long time in any case. 869 */ 870 871 RT_CACHE_STAT_INC(gc_goal_miss); 872 873 if (expire == 0) 874 break; 875 876 expire >>= 1; 877 #if RT_CACHE_DEBUG >= 2 878 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, 879 atomic_read(&ipv4_dst_ops.entries), goal, i); 880 #endif 881 882 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 883 goto out; 884 } while (!in_softirq() && time_before_eq(jiffies, now)); 885 886 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 887 goto out; 888 if (net_ratelimit()) 889 printk(KERN_WARNING "dst cache overflow\n"); 890 RT_CACHE_STAT_INC(gc_dst_overflow); 891 return 1; 892 893 work_done: 894 expire += ip_rt_gc_min_interval; 895 if (expire > ip_rt_gc_timeout || 896 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) 897 expire = ip_rt_gc_timeout; 898 #if RT_CACHE_DEBUG >= 2 899 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, 900 atomic_read(&ipv4_dst_ops.entries), goal, rover); 901 #endif 902 out: return 0; 903 } 904 905 static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) 906 { 907 struct rtable *rth, **rthp; 908 unsigned long now; 909 struct rtable *cand, **candp; 910 u32 min_score; 911 int chain_length; 912 int attempts = !in_softirq(); 913 914 restart: 915 chain_length = 0; 916 min_score = ~(u32)0; 917 cand = NULL; 918 candp = NULL; 919 now = jiffies; 920 921 rthp = &rt_hash_table[hash].chain; 922 923 spin_lock_bh(rt_hash_lock_addr(hash)); 924 while ((rth = *rthp) != NULL) { 925 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 926 if (!(rth->u.dst.flags & DST_BALANCED) && 927 compare_keys(&rth->fl, &rt->fl)) { 928 #else 929 if (compare_keys(&rth->fl, &rt->fl)) { 930 #endif 931 /* Put it first */ 932 *rthp = rth->u.rt_next; 933 /* 934 * Since lookup is lockfree, the deletion 935 * must be visible to another weakly ordered CPU before 936 * the insertion at the start of the hash chain. 937 */ 938 rcu_assign_pointer(rth->u.rt_next, 939 rt_hash_table[hash].chain); 940 /* 941 * Since lookup is lockfree, the update writes 942 * must be ordered for consistency on SMP. 943 */ 944 rcu_assign_pointer(rt_hash_table[hash].chain, rth); 945 946 rth->u.dst.__use++; 947 dst_hold(&rth->u.dst); 948 rth->u.dst.lastuse = now; 949 spin_unlock_bh(rt_hash_lock_addr(hash)); 950 951 rt_drop(rt); 952 *rp = rth; 953 return 0; 954 } 955 956 if (!atomic_read(&rth->u.dst.__refcnt)) { 957 u32 score = rt_score(rth); 958 959 if (score <= min_score) { 960 cand = rth; 961 candp = rthp; 962 min_score = score; 963 } 964 } 965 966 chain_length++; 967 968 rthp = &rth->u.rt_next; 969 } 970 971 if (cand) { 972 /* ip_rt_gc_elasticity used to be average length of chain 973 * length, when exceeded gc becomes really aggressive. 974 * 975 * The second limit is less certain. At the moment it allows 976 * only 2 entries per bucket. We will see. 977 */ 978 if (chain_length > ip_rt_gc_elasticity) { 979 *candp = cand->u.rt_next; 980 rt_free(cand); 981 } 982 } 983 984 /* Try to bind route to arp only if it is output 985 route or unicast forwarding path. 986 */ 987 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 988 int err = arp_bind_neighbour(&rt->u.dst); 989 if (err) { 990 spin_unlock_bh(rt_hash_lock_addr(hash)); 991 992 if (err != -ENOBUFS) { 993 rt_drop(rt); 994 return err; 995 } 996 997 /* Neighbour tables are full and nothing 998 can be released. Try to shrink route cache, 999 it is most likely it holds some neighbour records. 1000 */ 1001 if (attempts-- > 0) { 1002 int saved_elasticity = ip_rt_gc_elasticity; 1003 int saved_int = ip_rt_gc_min_interval; 1004 ip_rt_gc_elasticity = 1; 1005 ip_rt_gc_min_interval = 0; 1006 rt_garbage_collect(); 1007 ip_rt_gc_min_interval = saved_int; 1008 ip_rt_gc_elasticity = saved_elasticity; 1009 goto restart; 1010 } 1011 1012 if (net_ratelimit()) 1013 printk(KERN_WARNING "Neighbour table overflow.\n"); 1014 rt_drop(rt); 1015 return -ENOBUFS; 1016 } 1017 } 1018 1019 rt->u.rt_next = rt_hash_table[hash].chain; 1020 #if RT_CACHE_DEBUG >= 2 1021 if (rt->u.rt_next) { 1022 struct rtable *trt; 1023 printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash, 1024 NIPQUAD(rt->rt_dst)); 1025 for (trt = rt->u.rt_next; trt; trt = trt->u.rt_next) 1026 printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst)); 1027 printk("\n"); 1028 } 1029 #endif 1030 rt_hash_table[hash].chain = rt; 1031 spin_unlock_bh(rt_hash_lock_addr(hash)); 1032 *rp = rt; 1033 return 0; 1034 } 1035 1036 void rt_bind_peer(struct rtable *rt, int create) 1037 { 1038 static DEFINE_SPINLOCK(rt_peer_lock); 1039 struct inet_peer *peer; 1040 1041 peer = inet_getpeer(rt->rt_dst, create); 1042 1043 spin_lock_bh(&rt_peer_lock); 1044 if (rt->peer == NULL) { 1045 rt->peer = peer; 1046 peer = NULL; 1047 } 1048 spin_unlock_bh(&rt_peer_lock); 1049 if (peer) 1050 inet_putpeer(peer); 1051 } 1052 1053 /* 1054 * Peer allocation may fail only in serious out-of-memory conditions. However 1055 * we still can generate some output. 1056 * Random ID selection looks a bit dangerous because we have no chances to 1057 * select ID being unique in a reasonable period of time. 1058 * But broken packet identifier may be better than no packet at all. 1059 */ 1060 static void ip_select_fb_ident(struct iphdr *iph) 1061 { 1062 static DEFINE_SPINLOCK(ip_fb_id_lock); 1063 static u32 ip_fallback_id; 1064 u32 salt; 1065 1066 spin_lock_bh(&ip_fb_id_lock); 1067 salt = secure_ip_id(ip_fallback_id ^ iph->daddr); 1068 iph->id = htons(salt & 0xFFFF); 1069 ip_fallback_id = salt; 1070 spin_unlock_bh(&ip_fb_id_lock); 1071 } 1072 1073 void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) 1074 { 1075 struct rtable *rt = (struct rtable *) dst; 1076 1077 if (rt) { 1078 if (rt->peer == NULL) 1079 rt_bind_peer(rt, 1); 1080 1081 /* If peer is attached to destination, it is never detached, 1082 so that we need not to grab a lock to dereference it. 1083 */ 1084 if (rt->peer) { 1085 iph->id = htons(inet_getid(rt->peer, more)); 1086 return; 1087 } 1088 } else 1089 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", 1090 __builtin_return_address(0)); 1091 1092 ip_select_fb_ident(iph); 1093 } 1094 1095 static void rt_del(unsigned hash, struct rtable *rt) 1096 { 1097 struct rtable **rthp; 1098 1099 spin_lock_bh(rt_hash_lock_addr(hash)); 1100 ip_rt_put(rt); 1101 for (rthp = &rt_hash_table[hash].chain; *rthp; 1102 rthp = &(*rthp)->u.rt_next) 1103 if (*rthp == rt) { 1104 *rthp = rt->u.rt_next; 1105 rt_free(rt); 1106 break; 1107 } 1108 spin_unlock_bh(rt_hash_lock_addr(hash)); 1109 } 1110 1111 void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, 1112 u32 saddr, u8 tos, struct net_device *dev) 1113 { 1114 int i, k; 1115 struct in_device *in_dev = in_dev_get(dev); 1116 struct rtable *rth, **rthp; 1117 u32 skeys[2] = { saddr, 0 }; 1118 int ikeys[2] = { dev->ifindex, 0 }; 1119 1120 tos &= IPTOS_RT_MASK; 1121 1122 if (!in_dev) 1123 return; 1124 1125 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1126 || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw)) 1127 goto reject_redirect; 1128 1129 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1130 if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 1131 goto reject_redirect; 1132 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 1133 goto reject_redirect; 1134 } else { 1135 if (inet_addr_type(new_gw) != RTN_UNICAST) 1136 goto reject_redirect; 1137 } 1138 1139 for (i = 0; i < 2; i++) { 1140 for (k = 0; k < 2; k++) { 1141 unsigned hash = rt_hash_code(daddr, 1142 skeys[i] ^ (ikeys[k] << 5), 1143 tos); 1144 1145 rthp=&rt_hash_table[hash].chain; 1146 1147 rcu_read_lock(); 1148 while ((rth = rcu_dereference(*rthp)) != NULL) { 1149 struct rtable *rt; 1150 1151 if (rth->fl.fl4_dst != daddr || 1152 rth->fl.fl4_src != skeys[i] || 1153 rth->fl.fl4_tos != tos || 1154 rth->fl.oif != ikeys[k] || 1155 rth->fl.iif != 0) { 1156 rthp = &rth->u.rt_next; 1157 continue; 1158 } 1159 1160 if (rth->rt_dst != daddr || 1161 rth->rt_src != saddr || 1162 rth->u.dst.error || 1163 rth->rt_gateway != old_gw || 1164 rth->u.dst.dev != dev) 1165 break; 1166 1167 dst_hold(&rth->u.dst); 1168 rcu_read_unlock(); 1169 1170 rt = dst_alloc(&ipv4_dst_ops); 1171 if (rt == NULL) { 1172 ip_rt_put(rth); 1173 in_dev_put(in_dev); 1174 return; 1175 } 1176 1177 /* Copy all the information. */ 1178 *rt = *rth; 1179 INIT_RCU_HEAD(&rt->u.dst.rcu_head); 1180 rt->u.dst.__use = 1; 1181 atomic_set(&rt->u.dst.__refcnt, 1); 1182 rt->u.dst.child = NULL; 1183 if (rt->u.dst.dev) 1184 dev_hold(rt->u.dst.dev); 1185 if (rt->idev) 1186 in_dev_hold(rt->idev); 1187 rt->u.dst.obsolete = 0; 1188 rt->u.dst.lastuse = jiffies; 1189 rt->u.dst.path = &rt->u.dst; 1190 rt->u.dst.neighbour = NULL; 1191 rt->u.dst.hh = NULL; 1192 rt->u.dst.xfrm = NULL; 1193 1194 rt->rt_flags |= RTCF_REDIRECTED; 1195 1196 /* Gateway is different ... */ 1197 rt->rt_gateway = new_gw; 1198 1199 /* Redirect received -> path was valid */ 1200 dst_confirm(&rth->u.dst); 1201 1202 if (rt->peer) 1203 atomic_inc(&rt->peer->refcnt); 1204 1205 if (arp_bind_neighbour(&rt->u.dst) || 1206 !(rt->u.dst.neighbour->nud_state & 1207 NUD_VALID)) { 1208 if (rt->u.dst.neighbour) 1209 neigh_event_send(rt->u.dst.neighbour, NULL); 1210 ip_rt_put(rth); 1211 rt_drop(rt); 1212 goto do_next; 1213 } 1214 1215 rt_del(hash, rth); 1216 if (!rt_intern_hash(hash, rt, &rt)) 1217 ip_rt_put(rt); 1218 goto do_next; 1219 } 1220 rcu_read_unlock(); 1221 do_next: 1222 ; 1223 } 1224 } 1225 in_dev_put(in_dev); 1226 return; 1227 1228 reject_redirect: 1229 #ifdef CONFIG_IP_ROUTE_VERBOSE 1230 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 1231 printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " 1232 "%u.%u.%u.%u ignored.\n" 1233 " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, " 1234 "tos %02x\n", 1235 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), 1236 NIPQUAD(saddr), NIPQUAD(daddr), tos); 1237 #endif 1238 in_dev_put(in_dev); 1239 } 1240 1241 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1242 { 1243 struct rtable *rt = (struct rtable*)dst; 1244 struct dst_entry *ret = dst; 1245 1246 if (rt) { 1247 if (dst->obsolete) { 1248 ip_rt_put(rt); 1249 ret = NULL; 1250 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1251 rt->u.dst.expires) { 1252 unsigned hash = rt_hash_code(rt->fl.fl4_dst, 1253 rt->fl.fl4_src ^ 1254 (rt->fl.oif << 5), 1255 rt->fl.fl4_tos); 1256 #if RT_CACHE_DEBUG >= 1 1257 printk(KERN_DEBUG "ip_rt_advice: redirect to " 1258 "%u.%u.%u.%u/%02x dropped\n", 1259 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); 1260 #endif 1261 rt_del(hash, rt); 1262 ret = NULL; 1263 } 1264 } 1265 return ret; 1266 } 1267 1268 /* 1269 * Algorithm: 1270 * 1. The first ip_rt_redirect_number redirects are sent 1271 * with exponential backoff, then we stop sending them at all, 1272 * assuming that the host ignores our redirects. 1273 * 2. If we did not see packets requiring redirects 1274 * during ip_rt_redirect_silence, we assume that the host 1275 * forgot redirected route and start to send redirects again. 1276 * 1277 * This algorithm is much cheaper and more intelligent than dumb load limiting 1278 * in icmp.c. 1279 * 1280 * NOTE. Do not forget to inhibit load limiting for redirects (redundant) 1281 * and "frag. need" (breaks PMTU discovery) in icmp.c. 1282 */ 1283 1284 void ip_rt_send_redirect(struct sk_buff *skb) 1285 { 1286 struct rtable *rt = (struct rtable*)skb->dst; 1287 struct in_device *in_dev = in_dev_get(rt->u.dst.dev); 1288 1289 if (!in_dev) 1290 return; 1291 1292 if (!IN_DEV_TX_REDIRECTS(in_dev)) 1293 goto out; 1294 1295 /* No redirected packets during ip_rt_redirect_silence; 1296 * reset the algorithm. 1297 */ 1298 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) 1299 rt->u.dst.rate_tokens = 0; 1300 1301 /* Too many ignored redirects; do not send anything 1302 * set u.dst.rate_last to the last seen redirected packet. 1303 */ 1304 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1305 rt->u.dst.rate_last = jiffies; 1306 goto out; 1307 } 1308 1309 /* Check for load limit; set rate_last to the latest sent 1310 * redirect. 1311 */ 1312 if (time_after(jiffies, 1313 (rt->u.dst.rate_last + 1314 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { 1315 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1316 rt->u.dst.rate_last = jiffies; 1317 ++rt->u.dst.rate_tokens; 1318 #ifdef CONFIG_IP_ROUTE_VERBOSE 1319 if (IN_DEV_LOG_MARTIANS(in_dev) && 1320 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1321 net_ratelimit()) 1322 printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores " 1323 "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n", 1324 NIPQUAD(rt->rt_src), rt->rt_iif, 1325 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); 1326 #endif 1327 } 1328 out: 1329 in_dev_put(in_dev); 1330 } 1331 1332 static int ip_error(struct sk_buff *skb) 1333 { 1334 struct rtable *rt = (struct rtable*)skb->dst; 1335 unsigned long now; 1336 int code; 1337 1338 switch (rt->u.dst.error) { 1339 case EINVAL: 1340 default: 1341 goto out; 1342 case EHOSTUNREACH: 1343 code = ICMP_HOST_UNREACH; 1344 break; 1345 case ENETUNREACH: 1346 code = ICMP_NET_UNREACH; 1347 break; 1348 case EACCES: 1349 code = ICMP_PKT_FILTERED; 1350 break; 1351 } 1352 1353 now = jiffies; 1354 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; 1355 if (rt->u.dst.rate_tokens > ip_rt_error_burst) 1356 rt->u.dst.rate_tokens = ip_rt_error_burst; 1357 rt->u.dst.rate_last = now; 1358 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { 1359 rt->u.dst.rate_tokens -= ip_rt_error_cost; 1360 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1361 } 1362 1363 out: kfree_skb(skb); 1364 return 0; 1365 } 1366 1367 /* 1368 * The last two values are not from the RFC but 1369 * are needed for AMPRnet AX.25 paths. 1370 */ 1371 1372 static unsigned short mtu_plateau[] = 1373 {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; 1374 1375 static __inline__ unsigned short guess_mtu(unsigned short old_mtu) 1376 { 1377 int i; 1378 1379 for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) 1380 if (old_mtu > mtu_plateau[i]) 1381 return mtu_plateau[i]; 1382 return 68; 1383 } 1384 1385 unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) 1386 { 1387 int i; 1388 unsigned short old_mtu = ntohs(iph->tot_len); 1389 struct rtable *rth; 1390 u32 skeys[2] = { iph->saddr, 0, }; 1391 u32 daddr = iph->daddr; 1392 u8 tos = iph->tos & IPTOS_RT_MASK; 1393 unsigned short est_mtu = 0; 1394 1395 if (ipv4_config.no_pmtu_disc) 1396 return 0; 1397 1398 for (i = 0; i < 2; i++) { 1399 unsigned hash = rt_hash_code(daddr, skeys[i], tos); 1400 1401 rcu_read_lock(); 1402 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1403 rth = rcu_dereference(rth->u.rt_next)) { 1404 if (rth->fl.fl4_dst == daddr && 1405 rth->fl.fl4_src == skeys[i] && 1406 rth->rt_dst == daddr && 1407 rth->rt_src == iph->saddr && 1408 rth->fl.fl4_tos == tos && 1409 rth->fl.iif == 0 && 1410 !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { 1411 unsigned short mtu = new_mtu; 1412 1413 if (new_mtu < 68 || new_mtu >= old_mtu) { 1414 1415 /* BSD 4.2 compatibility hack :-( */ 1416 if (mtu == 0 && 1417 old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] && 1418 old_mtu >= 68 + (iph->ihl << 2)) 1419 old_mtu -= iph->ihl << 2; 1420 1421 mtu = guess_mtu(old_mtu); 1422 } 1423 if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) { 1424 if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { 1425 dst_confirm(&rth->u.dst); 1426 if (mtu < ip_rt_min_pmtu) { 1427 mtu = ip_rt_min_pmtu; 1428 rth->u.dst.metrics[RTAX_LOCK-1] |= 1429 (1 << RTAX_MTU); 1430 } 1431 rth->u.dst.metrics[RTAX_MTU-1] = mtu; 1432 dst_set_expires(&rth->u.dst, 1433 ip_rt_mtu_expires); 1434 } 1435 est_mtu = mtu; 1436 } 1437 } 1438 } 1439 rcu_read_unlock(); 1440 } 1441 return est_mtu ? : new_mtu; 1442 } 1443 1444 static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1445 { 1446 if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 && 1447 !(dst_metric_locked(dst, RTAX_MTU))) { 1448 if (mtu < ip_rt_min_pmtu) { 1449 mtu = ip_rt_min_pmtu; 1450 dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); 1451 } 1452 dst->metrics[RTAX_MTU-1] = mtu; 1453 dst_set_expires(dst, ip_rt_mtu_expires); 1454 } 1455 } 1456 1457 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1458 { 1459 return NULL; 1460 } 1461 1462 static void ipv4_dst_destroy(struct dst_entry *dst) 1463 { 1464 struct rtable *rt = (struct rtable *) dst; 1465 struct inet_peer *peer = rt->peer; 1466 struct in_device *idev = rt->idev; 1467 1468 if (peer) { 1469 rt->peer = NULL; 1470 inet_putpeer(peer); 1471 } 1472 1473 if (idev) { 1474 rt->idev = NULL; 1475 in_dev_put(idev); 1476 } 1477 } 1478 1479 static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 1480 int how) 1481 { 1482 struct rtable *rt = (struct rtable *) dst; 1483 struct in_device *idev = rt->idev; 1484 if (dev != &loopback_dev && idev && idev->dev == dev) { 1485 struct in_device *loopback_idev = in_dev_get(&loopback_dev); 1486 if (loopback_idev) { 1487 rt->idev = loopback_idev; 1488 in_dev_put(idev); 1489 } 1490 } 1491 } 1492 1493 static void ipv4_link_failure(struct sk_buff *skb) 1494 { 1495 struct rtable *rt; 1496 1497 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1498 1499 rt = (struct rtable *) skb->dst; 1500 if (rt) 1501 dst_set_expires(&rt->u.dst, 0); 1502 } 1503 1504 static int ip_rt_bug(struct sk_buff *skb) 1505 { 1506 printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", 1507 NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), 1508 skb->dev ? skb->dev->name : "?"); 1509 kfree_skb(skb); 1510 return 0; 1511 } 1512 1513 /* 1514 We do not cache source address of outgoing interface, 1515 because it is used only by IP RR, TS and SRR options, 1516 so that it out of fast path. 1517 1518 BTW remember: "addr" is allowed to be not aligned 1519 in IP options! 1520 */ 1521 1522 void ip_rt_get_source(u8 *addr, struct rtable *rt) 1523 { 1524 u32 src; 1525 struct fib_result res; 1526 1527 if (rt->fl.iif == 0) 1528 src = rt->rt_src; 1529 else if (fib_lookup(&rt->fl, &res) == 0) { 1530 src = FIB_RES_PREFSRC(res); 1531 fib_res_put(&res); 1532 } else 1533 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, 1534 RT_SCOPE_UNIVERSE); 1535 memcpy(addr, &src, 4); 1536 } 1537 1538 #ifdef CONFIG_NET_CLS_ROUTE 1539 static void set_class_tag(struct rtable *rt, u32 tag) 1540 { 1541 if (!(rt->u.dst.tclassid & 0xFFFF)) 1542 rt->u.dst.tclassid |= tag & 0xFFFF; 1543 if (!(rt->u.dst.tclassid & 0xFFFF0000)) 1544 rt->u.dst.tclassid |= tag & 0xFFFF0000; 1545 } 1546 #endif 1547 1548 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) 1549 { 1550 struct fib_info *fi = res->fi; 1551 1552 if (fi) { 1553 if (FIB_RES_GW(*res) && 1554 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1555 rt->rt_gateway = FIB_RES_GW(*res); 1556 memcpy(rt->u.dst.metrics, fi->fib_metrics, 1557 sizeof(rt->u.dst.metrics)); 1558 if (fi->fib_mtu == 0) { 1559 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 1560 if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) && 1561 rt->rt_gateway != rt->rt_dst && 1562 rt->u.dst.dev->mtu > 576) 1563 rt->u.dst.metrics[RTAX_MTU-1] = 576; 1564 } 1565 #ifdef CONFIG_NET_CLS_ROUTE 1566 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1567 #endif 1568 } else 1569 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; 1570 1571 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1572 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; 1573 if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) 1574 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; 1575 if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) 1576 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, 1577 ip_rt_min_advmss); 1578 if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40) 1579 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; 1580 1581 #ifdef CONFIG_NET_CLS_ROUTE 1582 #ifdef CONFIG_IP_MULTIPLE_TABLES 1583 set_class_tag(rt, fib_rules_tclass(res)); 1584 #endif 1585 set_class_tag(rt, itag); 1586 #endif 1587 rt->rt_type = res->type; 1588 } 1589 1590 static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, 1591 u8 tos, struct net_device *dev, int our) 1592 { 1593 unsigned hash; 1594 struct rtable *rth; 1595 u32 spec_dst; 1596 struct in_device *in_dev = in_dev_get(dev); 1597 u32 itag = 0; 1598 1599 /* Primary sanity checks. */ 1600 1601 if (in_dev == NULL) 1602 return -EINVAL; 1603 1604 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) || 1605 skb->protocol != htons(ETH_P_IP)) 1606 goto e_inval; 1607 1608 if (ZERONET(saddr)) { 1609 if (!LOCAL_MCAST(daddr)) 1610 goto e_inval; 1611 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1612 } else if (fib_validate_source(saddr, 0, tos, 0, 1613 dev, &spec_dst, &itag) < 0) 1614 goto e_inval; 1615 1616 rth = dst_alloc(&ipv4_dst_ops); 1617 if (!rth) 1618 goto e_nobufs; 1619 1620 rth->u.dst.output= ip_rt_bug; 1621 1622 atomic_set(&rth->u.dst.__refcnt, 1); 1623 rth->u.dst.flags= DST_HOST; 1624 if (in_dev->cnf.no_policy) 1625 rth->u.dst.flags |= DST_NOPOLICY; 1626 rth->fl.fl4_dst = daddr; 1627 rth->rt_dst = daddr; 1628 rth->fl.fl4_tos = tos; 1629 #ifdef CONFIG_IP_ROUTE_FWMARK 1630 rth->fl.fl4_fwmark= skb->nfmark; 1631 #endif 1632 rth->fl.fl4_src = saddr; 1633 rth->rt_src = saddr; 1634 #ifdef CONFIG_NET_CLS_ROUTE 1635 rth->u.dst.tclassid = itag; 1636 #endif 1637 rth->rt_iif = 1638 rth->fl.iif = dev->ifindex; 1639 rth->u.dst.dev = &loopback_dev; 1640 dev_hold(rth->u.dst.dev); 1641 rth->idev = in_dev_get(rth->u.dst.dev); 1642 rth->fl.oif = 0; 1643 rth->rt_gateway = daddr; 1644 rth->rt_spec_dst= spec_dst; 1645 rth->rt_type = RTN_MULTICAST; 1646 rth->rt_flags = RTCF_MULTICAST; 1647 if (our) { 1648 rth->u.dst.input= ip_local_deliver; 1649 rth->rt_flags |= RTCF_LOCAL; 1650 } 1651 1652 #ifdef CONFIG_IP_MROUTE 1653 if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 1654 rth->u.dst.input = ip_mr_input; 1655 #endif 1656 RT_CACHE_STAT_INC(in_slow_mc); 1657 1658 in_dev_put(in_dev); 1659 hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); 1660 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); 1661 1662 e_nobufs: 1663 in_dev_put(in_dev); 1664 return -ENOBUFS; 1665 1666 e_inval: 1667 in_dev_put(in_dev); 1668 return -EINVAL; 1669 } 1670 1671 1672 static void ip_handle_martian_source(struct net_device *dev, 1673 struct in_device *in_dev, 1674 struct sk_buff *skb, 1675 u32 daddr, 1676 u32 saddr) 1677 { 1678 RT_CACHE_STAT_INC(in_martian_src); 1679 #ifdef CONFIG_IP_ROUTE_VERBOSE 1680 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { 1681 /* 1682 * RFC1812 recommendation, if source is martian, 1683 * the only hint is MAC header. 1684 */ 1685 printk(KERN_WARNING "martian source %u.%u.%u.%u from " 1686 "%u.%u.%u.%u, on dev %s\n", 1687 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 1688 if (dev->hard_header_len && skb->mac.raw) { 1689 int i; 1690 unsigned char *p = skb->mac.raw; 1691 printk(KERN_WARNING "ll header: "); 1692 for (i = 0; i < dev->hard_header_len; i++, p++) { 1693 printk("%02x", *p); 1694 if (i < (dev->hard_header_len - 1)) 1695 printk(":"); 1696 } 1697 printk("\n"); 1698 } 1699 } 1700 #endif 1701 } 1702 1703 static inline int __mkroute_input(struct sk_buff *skb, 1704 struct fib_result* res, 1705 struct in_device *in_dev, 1706 u32 daddr, u32 saddr, u32 tos, 1707 struct rtable **result) 1708 { 1709 1710 struct rtable *rth; 1711 int err; 1712 struct in_device *out_dev; 1713 unsigned flags = 0; 1714 u32 spec_dst, itag; 1715 1716 /* get a working reference to the output device */ 1717 out_dev = in_dev_get(FIB_RES_DEV(*res)); 1718 if (out_dev == NULL) { 1719 if (net_ratelimit()) 1720 printk(KERN_CRIT "Bug in ip_route_input" \ 1721 "_slow(). Please, report\n"); 1722 return -EINVAL; 1723 } 1724 1725 1726 err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), 1727 in_dev->dev, &spec_dst, &itag); 1728 if (err < 0) { 1729 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1730 saddr); 1731 1732 err = -EINVAL; 1733 goto cleanup; 1734 } 1735 1736 if (err) 1737 flags |= RTCF_DIRECTSRC; 1738 1739 if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) && 1740 (IN_DEV_SHARED_MEDIA(out_dev) || 1741 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1742 flags |= RTCF_DOREDIRECT; 1743 1744 if (skb->protocol != htons(ETH_P_IP)) { 1745 /* Not IP (i.e. ARP). Do not create route, if it is 1746 * invalid for proxy arp. DNAT routes are always valid. 1747 */ 1748 if (out_dev == in_dev && !(flags & RTCF_DNAT)) { 1749 err = -EINVAL; 1750 goto cleanup; 1751 } 1752 } 1753 1754 1755 rth = dst_alloc(&ipv4_dst_ops); 1756 if (!rth) { 1757 err = -ENOBUFS; 1758 goto cleanup; 1759 } 1760 1761 rth->u.dst.flags= DST_HOST; 1762 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 1763 if (res->fi->fib_nhs > 1) 1764 rth->u.dst.flags |= DST_BALANCED; 1765 #endif 1766 if (in_dev->cnf.no_policy) 1767 rth->u.dst.flags |= DST_NOPOLICY; 1768 if (in_dev->cnf.no_xfrm) 1769 rth->u.dst.flags |= DST_NOXFRM; 1770 rth->fl.fl4_dst = daddr; 1771 rth->rt_dst = daddr; 1772 rth->fl.fl4_tos = tos; 1773 #ifdef CONFIG_IP_ROUTE_FWMARK 1774 rth->fl.fl4_fwmark= skb->nfmark; 1775 #endif 1776 rth->fl.fl4_src = saddr; 1777 rth->rt_src = saddr; 1778 rth->rt_gateway = daddr; 1779 rth->rt_iif = 1780 rth->fl.iif = in_dev->dev->ifindex; 1781 rth->u.dst.dev = (out_dev)->dev; 1782 dev_hold(rth->u.dst.dev); 1783 rth->idev = in_dev_get(rth->u.dst.dev); 1784 rth->fl.oif = 0; 1785 rth->rt_spec_dst= spec_dst; 1786 1787 rth->u.dst.input = ip_forward; 1788 rth->u.dst.output = ip_output; 1789 1790 rt_set_nexthop(rth, res, itag); 1791 1792 rth->rt_flags = flags; 1793 1794 *result = rth; 1795 err = 0; 1796 cleanup: 1797 /* release the working reference to the output device */ 1798 in_dev_put(out_dev); 1799 return err; 1800 } 1801 1802 static inline int ip_mkroute_input_def(struct sk_buff *skb, 1803 struct fib_result* res, 1804 const struct flowi *fl, 1805 struct in_device *in_dev, 1806 u32 daddr, u32 saddr, u32 tos) 1807 { 1808 struct rtable* rth = NULL; 1809 int err; 1810 unsigned hash; 1811 1812 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1813 if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) 1814 fib_select_multipath(fl, res); 1815 #endif 1816 1817 /* create a routing cache entry */ 1818 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); 1819 if (err) 1820 return err; 1821 atomic_set(&rth->u.dst.__refcnt, 1); 1822 1823 /* put it into the cache */ 1824 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); 1825 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1826 } 1827 1828 static inline int ip_mkroute_input(struct sk_buff *skb, 1829 struct fib_result* res, 1830 const struct flowi *fl, 1831 struct in_device *in_dev, 1832 u32 daddr, u32 saddr, u32 tos) 1833 { 1834 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 1835 struct rtable* rth = NULL; 1836 unsigned char hop, hopcount, lasthop; 1837 int err = -EINVAL; 1838 unsigned int hash; 1839 1840 if (res->fi) 1841 hopcount = res->fi->fib_nhs; 1842 else 1843 hopcount = 1; 1844 1845 lasthop = hopcount - 1; 1846 1847 /* distinguish between multipath and singlepath */ 1848 if (hopcount < 2) 1849 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, 1850 saddr, tos); 1851 1852 /* add all alternatives to the routing cache */ 1853 for (hop = 0; hop < hopcount; hop++) { 1854 res->nh_sel = hop; 1855 1856 /* create a routing cache entry */ 1857 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, 1858 &rth); 1859 if (err) 1860 return err; 1861 1862 /* put it into the cache */ 1863 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); 1864 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1865 if (err) 1866 return err; 1867 1868 /* forward hop information to multipath impl. */ 1869 multipath_set_nhinfo(rth, 1870 FIB_RES_NETWORK(*res), 1871 FIB_RES_NETMASK(*res), 1872 res->prefixlen, 1873 &FIB_RES_NH(*res)); 1874 1875 /* only for the last hop the reference count is handled 1876 * outside 1877 */ 1878 if (hop == lasthop) 1879 atomic_set(&(skb->dst->__refcnt), 1); 1880 } 1881 return err; 1882 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 1883 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); 1884 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 1885 } 1886 1887 1888 /* 1889 * NOTE. We drop all the packets that has local source 1890 * addresses, because every properly looped back packet 1891 * must have correct destination already attached by output routine. 1892 * 1893 * Such approach solves two big problems: 1894 * 1. Not simplex devices are handled properly. 1895 * 2. IP spoofing attempts are filtered with 100% of guarantee. 1896 */ 1897 1898 static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, 1899 u8 tos, struct net_device *dev) 1900 { 1901 struct fib_result res; 1902 struct in_device *in_dev = in_dev_get(dev); 1903 struct flowi fl = { .nl_u = { .ip4_u = 1904 { .daddr = daddr, 1905 .saddr = saddr, 1906 .tos = tos, 1907 .scope = RT_SCOPE_UNIVERSE, 1908 #ifdef CONFIG_IP_ROUTE_FWMARK 1909 .fwmark = skb->nfmark 1910 #endif 1911 } }, 1912 .iif = dev->ifindex }; 1913 unsigned flags = 0; 1914 u32 itag = 0; 1915 struct rtable * rth; 1916 unsigned hash; 1917 u32 spec_dst; 1918 int err = -EINVAL; 1919 int free_res = 0; 1920 1921 /* IP on this device is disabled. */ 1922 1923 if (!in_dev) 1924 goto out; 1925 1926 /* Check for the most weird martians, which can be not detected 1927 by fib_lookup. 1928 */ 1929 1930 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) 1931 goto martian_source; 1932 1933 if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0)) 1934 goto brd_input; 1935 1936 /* Accept zero addresses only to limited broadcast; 1937 * I even do not know to fix it or not. Waiting for complains :-) 1938 */ 1939 if (ZERONET(saddr)) 1940 goto martian_source; 1941 1942 if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr)) 1943 goto martian_destination; 1944 1945 /* 1946 * Now we are ready to route packet. 1947 */ 1948 if ((err = fib_lookup(&fl, &res)) != 0) { 1949 if (!IN_DEV_FORWARD(in_dev)) 1950 goto e_hostunreach; 1951 goto no_route; 1952 } 1953 free_res = 1; 1954 1955 RT_CACHE_STAT_INC(in_slow_tot); 1956 1957 if (res.type == RTN_BROADCAST) 1958 goto brd_input; 1959 1960 if (res.type == RTN_LOCAL) { 1961 int result; 1962 result = fib_validate_source(saddr, daddr, tos, 1963 loopback_dev.ifindex, 1964 dev, &spec_dst, &itag); 1965 if (result < 0) 1966 goto martian_source; 1967 if (result) 1968 flags |= RTCF_DIRECTSRC; 1969 spec_dst = daddr; 1970 goto local_input; 1971 } 1972 1973 if (!IN_DEV_FORWARD(in_dev)) 1974 goto e_hostunreach; 1975 if (res.type != RTN_UNICAST) 1976 goto martian_destination; 1977 1978 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 1979 if (err == -ENOBUFS) 1980 goto e_nobufs; 1981 if (err == -EINVAL) 1982 goto e_inval; 1983 1984 done: 1985 in_dev_put(in_dev); 1986 if (free_res) 1987 fib_res_put(&res); 1988 out: return err; 1989 1990 brd_input: 1991 if (skb->protocol != htons(ETH_P_IP)) 1992 goto e_inval; 1993 1994 if (ZERONET(saddr)) 1995 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1996 else { 1997 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 1998 &itag); 1999 if (err < 0) 2000 goto martian_source; 2001 if (err) 2002 flags |= RTCF_DIRECTSRC; 2003 } 2004 flags |= RTCF_BROADCAST; 2005 res.type = RTN_BROADCAST; 2006 RT_CACHE_STAT_INC(in_brd); 2007 2008 local_input: 2009 rth = dst_alloc(&ipv4_dst_ops); 2010 if (!rth) 2011 goto e_nobufs; 2012 2013 rth->u.dst.output= ip_rt_bug; 2014 2015 atomic_set(&rth->u.dst.__refcnt, 1); 2016 rth->u.dst.flags= DST_HOST; 2017 if (in_dev->cnf.no_policy) 2018 rth->u.dst.flags |= DST_NOPOLICY; 2019 rth->fl.fl4_dst = daddr; 2020 rth->rt_dst = daddr; 2021 rth->fl.fl4_tos = tos; 2022 #ifdef CONFIG_IP_ROUTE_FWMARK 2023 rth->fl.fl4_fwmark= skb->nfmark; 2024 #endif 2025 rth->fl.fl4_src = saddr; 2026 rth->rt_src = saddr; 2027 #ifdef CONFIG_NET_CLS_ROUTE 2028 rth->u.dst.tclassid = itag; 2029 #endif 2030 rth->rt_iif = 2031 rth->fl.iif = dev->ifindex; 2032 rth->u.dst.dev = &loopback_dev; 2033 dev_hold(rth->u.dst.dev); 2034 rth->idev = in_dev_get(rth->u.dst.dev); 2035 rth->rt_gateway = daddr; 2036 rth->rt_spec_dst= spec_dst; 2037 rth->u.dst.input= ip_local_deliver; 2038 rth->rt_flags = flags|RTCF_LOCAL; 2039 if (res.type == RTN_UNREACHABLE) { 2040 rth->u.dst.input= ip_error; 2041 rth->u.dst.error= -err; 2042 rth->rt_flags &= ~RTCF_LOCAL; 2043 } 2044 rth->rt_type = res.type; 2045 hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos); 2046 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 2047 goto done; 2048 2049 no_route: 2050 RT_CACHE_STAT_INC(in_no_route); 2051 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 2052 res.type = RTN_UNREACHABLE; 2053 goto local_input; 2054 2055 /* 2056 * Do not cache martian addresses: they should be logged (RFC1812) 2057 */ 2058 martian_destination: 2059 RT_CACHE_STAT_INC(in_martian_dst); 2060 #ifdef CONFIG_IP_ROUTE_VERBOSE 2061 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 2062 printk(KERN_WARNING "martian destination %u.%u.%u.%u from " 2063 "%u.%u.%u.%u, dev %s\n", 2064 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 2065 #endif 2066 2067 e_hostunreach: 2068 err = -EHOSTUNREACH; 2069 goto done; 2070 2071 e_inval: 2072 err = -EINVAL; 2073 goto done; 2074 2075 e_nobufs: 2076 err = -ENOBUFS; 2077 goto done; 2078 2079 martian_source: 2080 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2081 goto e_inval; 2082 } 2083 2084 int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, 2085 u8 tos, struct net_device *dev) 2086 { 2087 struct rtable * rth; 2088 unsigned hash; 2089 int iif = dev->ifindex; 2090 2091 tos &= IPTOS_RT_MASK; 2092 hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos); 2093 2094 rcu_read_lock(); 2095 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2096 rth = rcu_dereference(rth->u.rt_next)) { 2097 if (rth->fl.fl4_dst == daddr && 2098 rth->fl.fl4_src == saddr && 2099 rth->fl.iif == iif && 2100 rth->fl.oif == 0 && 2101 #ifdef CONFIG_IP_ROUTE_FWMARK 2102 rth->fl.fl4_fwmark == skb->nfmark && 2103 #endif 2104 rth->fl.fl4_tos == tos) { 2105 rth->u.dst.lastuse = jiffies; 2106 dst_hold(&rth->u.dst); 2107 rth->u.dst.__use++; 2108 RT_CACHE_STAT_INC(in_hit); 2109 rcu_read_unlock(); 2110 skb->dst = (struct dst_entry*)rth; 2111 return 0; 2112 } 2113 RT_CACHE_STAT_INC(in_hlist_search); 2114 } 2115 rcu_read_unlock(); 2116 2117 /* Multicast recognition logic is moved from route cache to here. 2118 The problem was that too many Ethernet cards have broken/missing 2119 hardware multicast filters :-( As result the host on multicasting 2120 network acquires a lot of useless route cache entries, sort of 2121 SDR messages from all the world. Now we try to get rid of them. 2122 Really, provided software IP multicast filter is organized 2123 reasonably (at least, hashed), it does not result in a slowdown 2124 comparing with route cache reject entries. 2125 Note, that multicast routers are not affected, because 2126 route cache entry is created eventually. 2127 */ 2128 if (MULTICAST(daddr)) { 2129 struct in_device *in_dev; 2130 2131 rcu_read_lock(); 2132 if ((in_dev = __in_dev_get(dev)) != NULL) { 2133 int our = ip_check_mc(in_dev, daddr, saddr, 2134 skb->nh.iph->protocol); 2135 if (our 2136 #ifdef CONFIG_IP_MROUTE 2137 || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 2138 #endif 2139 ) { 2140 rcu_read_unlock(); 2141 return ip_route_input_mc(skb, daddr, saddr, 2142 tos, dev, our); 2143 } 2144 } 2145 rcu_read_unlock(); 2146 return -EINVAL; 2147 } 2148 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2149 } 2150 2151 static inline int __mkroute_output(struct rtable **result, 2152 struct fib_result* res, 2153 const struct flowi *fl, 2154 const struct flowi *oldflp, 2155 struct net_device *dev_out, 2156 unsigned flags) 2157 { 2158 struct rtable *rth; 2159 struct in_device *in_dev; 2160 u32 tos = RT_FL_TOS(oldflp); 2161 int err = 0; 2162 2163 if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2164 return -EINVAL; 2165 2166 if (fl->fl4_dst == 0xFFFFFFFF) 2167 res->type = RTN_BROADCAST; 2168 else if (MULTICAST(fl->fl4_dst)) 2169 res->type = RTN_MULTICAST; 2170 else if (BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst)) 2171 return -EINVAL; 2172 2173 if (dev_out->flags & IFF_LOOPBACK) 2174 flags |= RTCF_LOCAL; 2175 2176 /* get work reference to inet device */ 2177 in_dev = in_dev_get(dev_out); 2178 if (!in_dev) 2179 return -EINVAL; 2180 2181 if (res->type == RTN_BROADCAST) { 2182 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2183 if (res->fi) { 2184 fib_info_put(res->fi); 2185 res->fi = NULL; 2186 } 2187 } else if (res->type == RTN_MULTICAST) { 2188 flags |= RTCF_MULTICAST|RTCF_LOCAL; 2189 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2190 oldflp->proto)) 2191 flags &= ~RTCF_LOCAL; 2192 /* If multicast route do not exist use 2193 default one, but do not gateway in this case. 2194 Yes, it is hack. 2195 */ 2196 if (res->fi && res->prefixlen < 4) { 2197 fib_info_put(res->fi); 2198 res->fi = NULL; 2199 } 2200 } 2201 2202 2203 rth = dst_alloc(&ipv4_dst_ops); 2204 if (!rth) { 2205 err = -ENOBUFS; 2206 goto cleanup; 2207 } 2208 2209 rth->u.dst.flags= DST_HOST; 2210 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2211 if (res->fi) { 2212 rth->rt_multipath_alg = res->fi->fib_mp_alg; 2213 if (res->fi->fib_nhs > 1) 2214 rth->u.dst.flags |= DST_BALANCED; 2215 } 2216 #endif 2217 if (in_dev->cnf.no_xfrm) 2218 rth->u.dst.flags |= DST_NOXFRM; 2219 if (in_dev->cnf.no_policy) 2220 rth->u.dst.flags |= DST_NOPOLICY; 2221 2222 rth->fl.fl4_dst = oldflp->fl4_dst; 2223 rth->fl.fl4_tos = tos; 2224 rth->fl.fl4_src = oldflp->fl4_src; 2225 rth->fl.oif = oldflp->oif; 2226 #ifdef CONFIG_IP_ROUTE_FWMARK 2227 rth->fl.fl4_fwmark= oldflp->fl4_fwmark; 2228 #endif 2229 rth->rt_dst = fl->fl4_dst; 2230 rth->rt_src = fl->fl4_src; 2231 rth->rt_iif = oldflp->oif ? : dev_out->ifindex; 2232 /* get references to the devices that are to be hold by the routing 2233 cache entry */ 2234 rth->u.dst.dev = dev_out; 2235 dev_hold(dev_out); 2236 rth->idev = in_dev_get(dev_out); 2237 rth->rt_gateway = fl->fl4_dst; 2238 rth->rt_spec_dst= fl->fl4_src; 2239 2240 rth->u.dst.output=ip_output; 2241 2242 RT_CACHE_STAT_INC(out_slow_tot); 2243 2244 if (flags & RTCF_LOCAL) { 2245 rth->u.dst.input = ip_local_deliver; 2246 rth->rt_spec_dst = fl->fl4_dst; 2247 } 2248 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2249 rth->rt_spec_dst = fl->fl4_src; 2250 if (flags & RTCF_LOCAL && 2251 !(dev_out->flags & IFF_LOOPBACK)) { 2252 rth->u.dst.output = ip_mc_output; 2253 RT_CACHE_STAT_INC(out_slow_mc); 2254 } 2255 #ifdef CONFIG_IP_MROUTE 2256 if (res->type == RTN_MULTICAST) { 2257 if (IN_DEV_MFORWARD(in_dev) && 2258 !LOCAL_MCAST(oldflp->fl4_dst)) { 2259 rth->u.dst.input = ip_mr_input; 2260 rth->u.dst.output = ip_mc_output; 2261 } 2262 } 2263 #endif 2264 } 2265 2266 rt_set_nexthop(rth, res, 0); 2267 2268 rth->rt_flags = flags; 2269 2270 *result = rth; 2271 cleanup: 2272 /* release work reference to inet device */ 2273 in_dev_put(in_dev); 2274 2275 return err; 2276 } 2277 2278 static inline int ip_mkroute_output_def(struct rtable **rp, 2279 struct fib_result* res, 2280 const struct flowi *fl, 2281 const struct flowi *oldflp, 2282 struct net_device *dev_out, 2283 unsigned flags) 2284 { 2285 struct rtable *rth = NULL; 2286 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2287 unsigned hash; 2288 if (err == 0) { 2289 u32 tos = RT_FL_TOS(oldflp); 2290 2291 atomic_set(&rth->u.dst.__refcnt, 1); 2292 2293 hash = rt_hash_code(oldflp->fl4_dst, 2294 oldflp->fl4_src ^ (oldflp->oif << 5), tos); 2295 err = rt_intern_hash(hash, rth, rp); 2296 } 2297 2298 return err; 2299 } 2300 2301 static inline int ip_mkroute_output(struct rtable** rp, 2302 struct fib_result* res, 2303 const struct flowi *fl, 2304 const struct flowi *oldflp, 2305 struct net_device *dev_out, 2306 unsigned flags) 2307 { 2308 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2309 u32 tos = RT_FL_TOS(oldflp); 2310 unsigned char hop; 2311 unsigned hash; 2312 int err = -EINVAL; 2313 struct rtable *rth = NULL; 2314 2315 if (res->fi && res->fi->fib_nhs > 1) { 2316 unsigned char hopcount = res->fi->fib_nhs; 2317 2318 for (hop = 0; hop < hopcount; hop++) { 2319 struct net_device *dev2nexthop; 2320 2321 res->nh_sel = hop; 2322 2323 /* hold a work reference to the output device */ 2324 dev2nexthop = FIB_RES_DEV(*res); 2325 dev_hold(dev2nexthop); 2326 2327 err = __mkroute_output(&rth, res, fl, oldflp, 2328 dev2nexthop, flags); 2329 2330 if (err != 0) 2331 goto cleanup; 2332 2333 hash = rt_hash_code(oldflp->fl4_dst, 2334 oldflp->fl4_src ^ 2335 (oldflp->oif << 5), tos); 2336 err = rt_intern_hash(hash, rth, rp); 2337 2338 /* forward hop information to multipath impl. */ 2339 multipath_set_nhinfo(rth, 2340 FIB_RES_NETWORK(*res), 2341 FIB_RES_NETMASK(*res), 2342 res->prefixlen, 2343 &FIB_RES_NH(*res)); 2344 cleanup: 2345 /* release work reference to output device */ 2346 dev_put(dev2nexthop); 2347 2348 if (err != 0) 2349 return err; 2350 } 2351 atomic_set(&(*rp)->u.dst.__refcnt, 1); 2352 return err; 2353 } else { 2354 return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, 2355 flags); 2356 } 2357 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 2358 return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags); 2359 #endif 2360 } 2361 2362 /* 2363 * Major route resolver routine. 2364 */ 2365 2366 static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) 2367 { 2368 u32 tos = RT_FL_TOS(oldflp); 2369 struct flowi fl = { .nl_u = { .ip4_u = 2370 { .daddr = oldflp->fl4_dst, 2371 .saddr = oldflp->fl4_src, 2372 .tos = tos & IPTOS_RT_MASK, 2373 .scope = ((tos & RTO_ONLINK) ? 2374 RT_SCOPE_LINK : 2375 RT_SCOPE_UNIVERSE), 2376 #ifdef CONFIG_IP_ROUTE_FWMARK 2377 .fwmark = oldflp->fl4_fwmark 2378 #endif 2379 } }, 2380 .iif = loopback_dev.ifindex, 2381 .oif = oldflp->oif }; 2382 struct fib_result res; 2383 unsigned flags = 0; 2384 struct net_device *dev_out = NULL; 2385 int free_res = 0; 2386 int err; 2387 2388 2389 res.fi = NULL; 2390 #ifdef CONFIG_IP_MULTIPLE_TABLES 2391 res.r = NULL; 2392 #endif 2393 2394 if (oldflp->fl4_src) { 2395 err = -EINVAL; 2396 if (MULTICAST(oldflp->fl4_src) || 2397 BADCLASS(oldflp->fl4_src) || 2398 ZERONET(oldflp->fl4_src)) 2399 goto out; 2400 2401 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2402 dev_out = ip_dev_find(oldflp->fl4_src); 2403 if (dev_out == NULL) 2404 goto out; 2405 2406 /* I removed check for oif == dev_out->oif here. 2407 It was wrong for two reasons: 2408 1. ip_dev_find(saddr) can return wrong iface, if saddr is 2409 assigned to multiple interfaces. 2410 2. Moreover, we are allowed to send packets with saddr 2411 of another iface. --ANK 2412 */ 2413 2414 if (oldflp->oif == 0 2415 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) { 2416 /* Special hack: user can direct multicasts 2417 and limited broadcast via necessary interface 2418 without fiddling with IP_MULTICAST_IF or IP_PKTINFO. 2419 This hack is not just for fun, it allows 2420 vic,vat and friends to work. 2421 They bind socket to loopback, set ttl to zero 2422 and expect that it will work. 2423 From the viewpoint of routing cache they are broken, 2424 because we are not allowed to build multicast path 2425 with loopback source addr (look, routing cache 2426 cannot know, that ttl is zero, so that packet 2427 will not leave this host and route is valid). 2428 Luckily, this hack is good workaround. 2429 */ 2430 2431 fl.oif = dev_out->ifindex; 2432 goto make_route; 2433 } 2434 if (dev_out) 2435 dev_put(dev_out); 2436 dev_out = NULL; 2437 } 2438 2439 2440 if (oldflp->oif) { 2441 dev_out = dev_get_by_index(oldflp->oif); 2442 err = -ENODEV; 2443 if (dev_out == NULL) 2444 goto out; 2445 if (__in_dev_get(dev_out) == NULL) { 2446 dev_put(dev_out); 2447 goto out; /* Wrong error code */ 2448 } 2449 2450 if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) { 2451 if (!fl.fl4_src) 2452 fl.fl4_src = inet_select_addr(dev_out, 0, 2453 RT_SCOPE_LINK); 2454 goto make_route; 2455 } 2456 if (!fl.fl4_src) { 2457 if (MULTICAST(oldflp->fl4_dst)) 2458 fl.fl4_src = inet_select_addr(dev_out, 0, 2459 fl.fl4_scope); 2460 else if (!oldflp->fl4_dst) 2461 fl.fl4_src = inet_select_addr(dev_out, 0, 2462 RT_SCOPE_HOST); 2463 } 2464 } 2465 2466 if (!fl.fl4_dst) { 2467 fl.fl4_dst = fl.fl4_src; 2468 if (!fl.fl4_dst) 2469 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2470 if (dev_out) 2471 dev_put(dev_out); 2472 dev_out = &loopback_dev; 2473 dev_hold(dev_out); 2474 fl.oif = loopback_dev.ifindex; 2475 res.type = RTN_LOCAL; 2476 flags |= RTCF_LOCAL; 2477 goto make_route; 2478 } 2479 2480 if (fib_lookup(&fl, &res)) { 2481 res.fi = NULL; 2482 if (oldflp->oif) { 2483 /* Apparently, routing tables are wrong. Assume, 2484 that the destination is on link. 2485 2486 WHY? DW. 2487 Because we are allowed to send to iface 2488 even if it has NO routes and NO assigned 2489 addresses. When oif is specified, routing 2490 tables are looked up with only one purpose: 2491 to catch if destination is gatewayed, rather than 2492 direct. Moreover, if MSG_DONTROUTE is set, 2493 we send packet, ignoring both routing tables 2494 and ifaddr state. --ANK 2495 2496 2497 We could make it even if oif is unknown, 2498 likely IPv6, but we do not. 2499 */ 2500 2501 if (fl.fl4_src == 0) 2502 fl.fl4_src = inet_select_addr(dev_out, 0, 2503 RT_SCOPE_LINK); 2504 res.type = RTN_UNICAST; 2505 goto make_route; 2506 } 2507 if (dev_out) 2508 dev_put(dev_out); 2509 err = -ENETUNREACH; 2510 goto out; 2511 } 2512 free_res = 1; 2513 2514 if (res.type == RTN_LOCAL) { 2515 if (!fl.fl4_src) 2516 fl.fl4_src = fl.fl4_dst; 2517 if (dev_out) 2518 dev_put(dev_out); 2519 dev_out = &loopback_dev; 2520 dev_hold(dev_out); 2521 fl.oif = dev_out->ifindex; 2522 if (res.fi) 2523 fib_info_put(res.fi); 2524 res.fi = NULL; 2525 flags |= RTCF_LOCAL; 2526 goto make_route; 2527 } 2528 2529 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2530 if (res.fi->fib_nhs > 1 && fl.oif == 0) 2531 fib_select_multipath(&fl, &res); 2532 else 2533 #endif 2534 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) 2535 fib_select_default(&fl, &res); 2536 2537 if (!fl.fl4_src) 2538 fl.fl4_src = FIB_RES_PREFSRC(res); 2539 2540 if (dev_out) 2541 dev_put(dev_out); 2542 dev_out = FIB_RES_DEV(res); 2543 dev_hold(dev_out); 2544 fl.oif = dev_out->ifindex; 2545 2546 2547 make_route: 2548 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2549 2550 2551 if (free_res) 2552 fib_res_put(&res); 2553 if (dev_out) 2554 dev_put(dev_out); 2555 out: return err; 2556 } 2557 2558 int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) 2559 { 2560 unsigned hash; 2561 struct rtable *rth; 2562 2563 hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos); 2564 2565 rcu_read_lock_bh(); 2566 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2567 rth = rcu_dereference(rth->u.rt_next)) { 2568 if (rth->fl.fl4_dst == flp->fl4_dst && 2569 rth->fl.fl4_src == flp->fl4_src && 2570 rth->fl.iif == 0 && 2571 rth->fl.oif == flp->oif && 2572 #ifdef CONFIG_IP_ROUTE_FWMARK 2573 rth->fl.fl4_fwmark == flp->fl4_fwmark && 2574 #endif 2575 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2576 (IPTOS_RT_MASK | RTO_ONLINK))) { 2577 2578 /* check for multipath routes and choose one if 2579 * necessary 2580 */ 2581 if (multipath_select_route(flp, rth, rp)) { 2582 dst_hold(&(*rp)->u.dst); 2583 RT_CACHE_STAT_INC(out_hit); 2584 rcu_read_unlock_bh(); 2585 return 0; 2586 } 2587 2588 rth->u.dst.lastuse = jiffies; 2589 dst_hold(&rth->u.dst); 2590 rth->u.dst.__use++; 2591 RT_CACHE_STAT_INC(out_hit); 2592 rcu_read_unlock_bh(); 2593 *rp = rth; 2594 return 0; 2595 } 2596 RT_CACHE_STAT_INC(out_hlist_search); 2597 } 2598 rcu_read_unlock_bh(); 2599 2600 return ip_route_output_slow(rp, flp); 2601 } 2602 2603 int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) 2604 { 2605 int err; 2606 2607 if ((err = __ip_route_output_key(rp, flp)) != 0) 2608 return err; 2609 2610 if (flp->proto) { 2611 if (!flp->fl4_src) 2612 flp->fl4_src = (*rp)->rt_src; 2613 if (!flp->fl4_dst) 2614 flp->fl4_dst = (*rp)->rt_dst; 2615 return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); 2616 } 2617 2618 return 0; 2619 } 2620 2621 int ip_route_output_key(struct rtable **rp, struct flowi *flp) 2622 { 2623 return ip_route_output_flow(rp, flp, NULL, 0); 2624 } 2625 2626 static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2627 int nowait, unsigned int flags) 2628 { 2629 struct rtable *rt = (struct rtable*)skb->dst; 2630 struct rtmsg *r; 2631 struct nlmsghdr *nlh; 2632 unsigned char *b = skb->tail; 2633 struct rta_cacheinfo ci; 2634 #ifdef CONFIG_IP_MROUTE 2635 struct rtattr *eptr; 2636 #endif 2637 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); 2638 r = NLMSG_DATA(nlh); 2639 r->rtm_family = AF_INET; 2640 r->rtm_dst_len = 32; 2641 r->rtm_src_len = 0; 2642 r->rtm_tos = rt->fl.fl4_tos; 2643 r->rtm_table = RT_TABLE_MAIN; 2644 r->rtm_type = rt->rt_type; 2645 r->rtm_scope = RT_SCOPE_UNIVERSE; 2646 r->rtm_protocol = RTPROT_UNSPEC; 2647 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 2648 if (rt->rt_flags & RTCF_NOTIFY) 2649 r->rtm_flags |= RTM_F_NOTIFY; 2650 RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); 2651 if (rt->fl.fl4_src) { 2652 r->rtm_src_len = 32; 2653 RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); 2654 } 2655 if (rt->u.dst.dev) 2656 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); 2657 #ifdef CONFIG_NET_CLS_ROUTE 2658 if (rt->u.dst.tclassid) 2659 RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); 2660 #endif 2661 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2662 if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { 2663 __u32 alg = rt->rt_multipath_alg; 2664 2665 RTA_PUT(skb, RTA_MP_ALGO, 4, &alg); 2666 } 2667 #endif 2668 if (rt->fl.iif) 2669 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); 2670 else if (rt->rt_src != rt->fl.fl4_src) 2671 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); 2672 if (rt->rt_dst != rt->rt_gateway) 2673 RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); 2674 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2675 goto rtattr_failure; 2676 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 2677 ci.rta_used = rt->u.dst.__use; 2678 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 2679 if (rt->u.dst.expires) 2680 ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies); 2681 else 2682 ci.rta_expires = 0; 2683 ci.rta_error = rt->u.dst.error; 2684 ci.rta_id = ci.rta_ts = ci.rta_tsage = 0; 2685 if (rt->peer) { 2686 ci.rta_id = rt->peer->ip_id_count; 2687 if (rt->peer->tcp_ts_stamp) { 2688 ci.rta_ts = rt->peer->tcp_ts; 2689 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; 2690 } 2691 } 2692 #ifdef CONFIG_IP_MROUTE 2693 eptr = (struct rtattr*)skb->tail; 2694 #endif 2695 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); 2696 if (rt->fl.iif) { 2697 #ifdef CONFIG_IP_MROUTE 2698 u32 dst = rt->rt_dst; 2699 2700 if (MULTICAST(dst) && !LOCAL_MCAST(dst) && 2701 ipv4_devconf.mc_forwarding) { 2702 int err = ipmr_get_route(skb, r, nowait); 2703 if (err <= 0) { 2704 if (!nowait) { 2705 if (err == 0) 2706 return 0; 2707 goto nlmsg_failure; 2708 } else { 2709 if (err == -EMSGSIZE) 2710 goto nlmsg_failure; 2711 ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; 2712 } 2713 } 2714 } else 2715 #endif 2716 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 2717 } 2718 2719 nlh->nlmsg_len = skb->tail - b; 2720 return skb->len; 2721 2722 nlmsg_failure: 2723 rtattr_failure: 2724 skb_trim(skb, b - skb->data); 2725 return -1; 2726 } 2727 2728 int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2729 { 2730 struct rtattr **rta = arg; 2731 struct rtmsg *rtm = NLMSG_DATA(nlh); 2732 struct rtable *rt = NULL; 2733 u32 dst = 0; 2734 u32 src = 0; 2735 int iif = 0; 2736 int err = -ENOBUFS; 2737 struct sk_buff *skb; 2738 2739 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2740 if (!skb) 2741 goto out; 2742 2743 /* Reserve room for dummy headers, this skb can pass 2744 through good chunk of routing engine. 2745 */ 2746 skb->mac.raw = skb->data; 2747 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); 2748 2749 if (rta[RTA_SRC - 1]) 2750 memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); 2751 if (rta[RTA_DST - 1]) 2752 memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4); 2753 if (rta[RTA_IIF - 1]) 2754 memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int)); 2755 2756 if (iif) { 2757 struct net_device *dev = __dev_get_by_index(iif); 2758 err = -ENODEV; 2759 if (!dev) 2760 goto out_free; 2761 skb->protocol = htons(ETH_P_IP); 2762 skb->dev = dev; 2763 local_bh_disable(); 2764 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2765 local_bh_enable(); 2766 rt = (struct rtable*)skb->dst; 2767 if (!err && rt->u.dst.error) 2768 err = -rt->u.dst.error; 2769 } else { 2770 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, 2771 .saddr = src, 2772 .tos = rtm->rtm_tos } } }; 2773 int oif = 0; 2774 if (rta[RTA_OIF - 1]) 2775 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 2776 fl.oif = oif; 2777 err = ip_route_output_key(&rt, &fl); 2778 } 2779 if (err) 2780 goto out_free; 2781 2782 skb->dst = &rt->u.dst; 2783 if (rtm->rtm_flags & RTM_F_NOTIFY) 2784 rt->rt_flags |= RTCF_NOTIFY; 2785 2786 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 2787 2788 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2789 RTM_NEWROUTE, 0, 0); 2790 if (!err) 2791 goto out_free; 2792 if (err < 0) { 2793 err = -EMSGSIZE; 2794 goto out_free; 2795 } 2796 2797 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 2798 if (err > 0) 2799 err = 0; 2800 out: return err; 2801 2802 out_free: 2803 kfree_skb(skb); 2804 goto out; 2805 } 2806 2807 int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) 2808 { 2809 struct rtable *rt; 2810 int h, s_h; 2811 int idx, s_idx; 2812 2813 s_h = cb->args[0]; 2814 s_idx = idx = cb->args[1]; 2815 for (h = 0; h <= rt_hash_mask; h++) { 2816 if (h < s_h) continue; 2817 if (h > s_h) 2818 s_idx = 0; 2819 rcu_read_lock_bh(); 2820 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; 2821 rt = rcu_dereference(rt->u.rt_next), idx++) { 2822 if (idx < s_idx) 2823 continue; 2824 skb->dst = dst_clone(&rt->u.dst); 2825 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2826 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2827 1, NLM_F_MULTI) <= 0) { 2828 dst_release(xchg(&skb->dst, NULL)); 2829 rcu_read_unlock_bh(); 2830 goto done; 2831 } 2832 dst_release(xchg(&skb->dst, NULL)); 2833 } 2834 rcu_read_unlock_bh(); 2835 } 2836 2837 done: 2838 cb->args[0] = h; 2839 cb->args[1] = idx; 2840 return skb->len; 2841 } 2842 2843 void ip_rt_multicast_event(struct in_device *in_dev) 2844 { 2845 rt_cache_flush(0); 2846 } 2847 2848 #ifdef CONFIG_SYSCTL 2849 static int flush_delay; 2850 2851 static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, 2852 struct file *filp, void __user *buffer, 2853 size_t *lenp, loff_t *ppos) 2854 { 2855 if (write) { 2856 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2857 rt_cache_flush(flush_delay); 2858 return 0; 2859 } 2860 2861 return -EINVAL; 2862 } 2863 2864 static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, 2865 int __user *name, 2866 int nlen, 2867 void __user *oldval, 2868 size_t __user *oldlenp, 2869 void __user *newval, 2870 size_t newlen, 2871 void **context) 2872 { 2873 int delay; 2874 if (newlen != sizeof(int)) 2875 return -EINVAL; 2876 if (get_user(delay, (int __user *)newval)) 2877 return -EFAULT; 2878 rt_cache_flush(delay); 2879 return 0; 2880 } 2881 2882 ctl_table ipv4_route_table[] = { 2883 { 2884 .ctl_name = NET_IPV4_ROUTE_FLUSH, 2885 .procname = "flush", 2886 .data = &flush_delay, 2887 .maxlen = sizeof(int), 2888 .mode = 0200, 2889 .proc_handler = &ipv4_sysctl_rtcache_flush, 2890 .strategy = &ipv4_sysctl_rtcache_flush_strategy, 2891 }, 2892 { 2893 .ctl_name = NET_IPV4_ROUTE_MIN_DELAY, 2894 .procname = "min_delay", 2895 .data = &ip_rt_min_delay, 2896 .maxlen = sizeof(int), 2897 .mode = 0644, 2898 .proc_handler = &proc_dointvec_jiffies, 2899 .strategy = &sysctl_jiffies, 2900 }, 2901 { 2902 .ctl_name = NET_IPV4_ROUTE_MAX_DELAY, 2903 .procname = "max_delay", 2904 .data = &ip_rt_max_delay, 2905 .maxlen = sizeof(int), 2906 .mode = 0644, 2907 .proc_handler = &proc_dointvec_jiffies, 2908 .strategy = &sysctl_jiffies, 2909 }, 2910 { 2911 .ctl_name = NET_IPV4_ROUTE_GC_THRESH, 2912 .procname = "gc_thresh", 2913 .data = &ipv4_dst_ops.gc_thresh, 2914 .maxlen = sizeof(int), 2915 .mode = 0644, 2916 .proc_handler = &proc_dointvec, 2917 }, 2918 { 2919 .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, 2920 .procname = "max_size", 2921 .data = &ip_rt_max_size, 2922 .maxlen = sizeof(int), 2923 .mode = 0644, 2924 .proc_handler = &proc_dointvec, 2925 }, 2926 { 2927 /* Deprecated. Use gc_min_interval_ms */ 2928 2929 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL, 2930 .procname = "gc_min_interval", 2931 .data = &ip_rt_gc_min_interval, 2932 .maxlen = sizeof(int), 2933 .mode = 0644, 2934 .proc_handler = &proc_dointvec_jiffies, 2935 .strategy = &sysctl_jiffies, 2936 }, 2937 { 2938 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, 2939 .procname = "gc_min_interval_ms", 2940 .data = &ip_rt_gc_min_interval, 2941 .maxlen = sizeof(int), 2942 .mode = 0644, 2943 .proc_handler = &proc_dointvec_ms_jiffies, 2944 .strategy = &sysctl_ms_jiffies, 2945 }, 2946 { 2947 .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, 2948 .procname = "gc_timeout", 2949 .data = &ip_rt_gc_timeout, 2950 .maxlen = sizeof(int), 2951 .mode = 0644, 2952 .proc_handler = &proc_dointvec_jiffies, 2953 .strategy = &sysctl_jiffies, 2954 }, 2955 { 2956 .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, 2957 .procname = "gc_interval", 2958 .data = &ip_rt_gc_interval, 2959 .maxlen = sizeof(int), 2960 .mode = 0644, 2961 .proc_handler = &proc_dointvec_jiffies, 2962 .strategy = &sysctl_jiffies, 2963 }, 2964 { 2965 .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, 2966 .procname = "redirect_load", 2967 .data = &ip_rt_redirect_load, 2968 .maxlen = sizeof(int), 2969 .mode = 0644, 2970 .proc_handler = &proc_dointvec, 2971 }, 2972 { 2973 .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, 2974 .procname = "redirect_number", 2975 .data = &ip_rt_redirect_number, 2976 .maxlen = sizeof(int), 2977 .mode = 0644, 2978 .proc_handler = &proc_dointvec, 2979 }, 2980 { 2981 .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, 2982 .procname = "redirect_silence", 2983 .data = &ip_rt_redirect_silence, 2984 .maxlen = sizeof(int), 2985 .mode = 0644, 2986 .proc_handler = &proc_dointvec, 2987 }, 2988 { 2989 .ctl_name = NET_IPV4_ROUTE_ERROR_COST, 2990 .procname = "error_cost", 2991 .data = &ip_rt_error_cost, 2992 .maxlen = sizeof(int), 2993 .mode = 0644, 2994 .proc_handler = &proc_dointvec, 2995 }, 2996 { 2997 .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, 2998 .procname = "error_burst", 2999 .data = &ip_rt_error_burst, 3000 .maxlen = sizeof(int), 3001 .mode = 0644, 3002 .proc_handler = &proc_dointvec, 3003 }, 3004 { 3005 .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, 3006 .procname = "gc_elasticity", 3007 .data = &ip_rt_gc_elasticity, 3008 .maxlen = sizeof(int), 3009 .mode = 0644, 3010 .proc_handler = &proc_dointvec, 3011 }, 3012 { 3013 .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, 3014 .procname = "mtu_expires", 3015 .data = &ip_rt_mtu_expires, 3016 .maxlen = sizeof(int), 3017 .mode = 0644, 3018 .proc_handler = &proc_dointvec_jiffies, 3019 .strategy = &sysctl_jiffies, 3020 }, 3021 { 3022 .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, 3023 .procname = "min_pmtu", 3024 .data = &ip_rt_min_pmtu, 3025 .maxlen = sizeof(int), 3026 .mode = 0644, 3027 .proc_handler = &proc_dointvec, 3028 }, 3029 { 3030 .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, 3031 .procname = "min_adv_mss", 3032 .data = &ip_rt_min_advmss, 3033 .maxlen = sizeof(int), 3034 .mode = 0644, 3035 .proc_handler = &proc_dointvec, 3036 }, 3037 { 3038 .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, 3039 .procname = "secret_interval", 3040 .data = &ip_rt_secret_interval, 3041 .maxlen = sizeof(int), 3042 .mode = 0644, 3043 .proc_handler = &proc_dointvec_jiffies, 3044 .strategy = &sysctl_jiffies, 3045 }, 3046 { .ctl_name = 0 } 3047 }; 3048 #endif 3049 3050 #ifdef CONFIG_NET_CLS_ROUTE 3051 struct ip_rt_acct *ip_rt_acct; 3052 3053 /* This code sucks. But you should have seen it before! --RR */ 3054 3055 /* IP route accounting ptr for this logical cpu number. */ 3056 #define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256) 3057 3058 #ifdef CONFIG_PROC_FS 3059 static int ip_rt_acct_read(char *buffer, char **start, off_t offset, 3060 int length, int *eof, void *data) 3061 { 3062 unsigned int i; 3063 3064 if ((offset & 3) || (length & 3)) 3065 return -EIO; 3066 3067 if (offset >= sizeof(struct ip_rt_acct) * 256) { 3068 *eof = 1; 3069 return 0; 3070 } 3071 3072 if (offset + length >= sizeof(struct ip_rt_acct) * 256) { 3073 length = sizeof(struct ip_rt_acct) * 256 - offset; 3074 *eof = 1; 3075 } 3076 3077 offset /= sizeof(u32); 3078 3079 if (length > 0) { 3080 u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset; 3081 u32 *dst = (u32 *) buffer; 3082 3083 /* Copy first cpu. */ 3084 *start = buffer; 3085 memcpy(dst, src, length); 3086 3087 /* Add the other cpus in, one int at a time */ 3088 for_each_cpu(i) { 3089 unsigned int j; 3090 3091 src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset; 3092 3093 for (j = 0; j < length/4; j++) 3094 dst[j] += src[j]; 3095 } 3096 } 3097 return length; 3098 } 3099 #endif /* CONFIG_PROC_FS */ 3100 #endif /* CONFIG_NET_CLS_ROUTE */ 3101 3102 static __initdata unsigned long rhash_entries; 3103 static int __init set_rhash_entries(char *str) 3104 { 3105 if (!str) 3106 return 0; 3107 rhash_entries = simple_strtoul(str, &str, 0); 3108 return 1; 3109 } 3110 __setup("rhash_entries=", set_rhash_entries); 3111 3112 int __init ip_rt_init(void) 3113 { 3114 int rc = 0; 3115 3116 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ 3117 (jiffies ^ (jiffies >> 7))); 3118 3119 #ifdef CONFIG_NET_CLS_ROUTE 3120 { 3121 int order; 3122 for (order = 0; 3123 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++) 3124 /* NOTHING */; 3125 ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order); 3126 if (!ip_rt_acct) 3127 panic("IP: failed to allocate ip_rt_acct\n"); 3128 memset(ip_rt_acct, 0, PAGE_SIZE << order); 3129 } 3130 #endif 3131 3132 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", 3133 sizeof(struct rtable), 3134 0, SLAB_HWCACHE_ALIGN, 3135 NULL, NULL); 3136 3137 if (!ipv4_dst_ops.kmem_cachep) 3138 panic("IP: failed to allocate ip_dst_cache\n"); 3139 3140 rt_hash_table = (struct rt_hash_bucket *) 3141 alloc_large_system_hash("IP route cache", 3142 sizeof(struct rt_hash_bucket), 3143 rhash_entries, 3144 (num_physpages >= 128 * 1024) ? 3145 (27 - PAGE_SHIFT) : 3146 (29 - PAGE_SHIFT), 3147 HASH_HIGHMEM, 3148 &rt_hash_log, 3149 &rt_hash_mask, 3150 0); 3151 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); 3152 rt_hash_lock_init(); 3153 3154 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); 3155 ip_rt_max_size = (rt_hash_mask + 1) * 16; 3156 3157 rt_cache_stat = alloc_percpu(struct rt_cache_stat); 3158 if (!rt_cache_stat) 3159 return -ENOMEM; 3160 3161 devinet_init(); 3162 ip_fib_init(); 3163 3164 init_timer(&rt_flush_timer); 3165 rt_flush_timer.function = rt_run_flush; 3166 init_timer(&rt_periodic_timer); 3167 rt_periodic_timer.function = rt_check_expire; 3168 init_timer(&rt_secret_timer); 3169 rt_secret_timer.function = rt_secret_rebuild; 3170 3171 /* All the timers, started at system startup tend 3172 to synchronize. Perturb it a bit. 3173 */ 3174 rt_periodic_timer.expires = jiffies + net_random() % ip_rt_gc_interval + 3175 ip_rt_gc_interval; 3176 add_timer(&rt_periodic_timer); 3177 3178 rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + 3179 ip_rt_secret_interval; 3180 add_timer(&rt_secret_timer); 3181 3182 #ifdef CONFIG_PROC_FS 3183 { 3184 struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ 3185 if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || 3186 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 3187 proc_net_stat))) { 3188 free_percpu(rt_cache_stat); 3189 return -ENOMEM; 3190 } 3191 rtstat_pde->proc_fops = &rt_cpu_seq_fops; 3192 } 3193 #ifdef CONFIG_NET_CLS_ROUTE 3194 create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL); 3195 #endif 3196 #endif 3197 #ifdef CONFIG_XFRM 3198 xfrm_init(); 3199 xfrm4_init(); 3200 #endif 3201 return rc; 3202 } 3203 3204 EXPORT_SYMBOL(__ip_select_ident); 3205 EXPORT_SYMBOL(ip_route_input); 3206 EXPORT_SYMBOL(ip_route_output_key); 3207