1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * ROUTE - implementation of the IP router. 7 * 8 * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $ 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Alan Cox, <gw4pts@gw4pts.ampr.org> 13 * Linus Torvalds, <Linus.Torvalds@helsinki.fi> 14 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 15 * 16 * Fixes: 17 * Alan Cox : Verify area fixes. 18 * Alan Cox : cli() protects routing changes 19 * Rui Oliveira : ICMP routing table updates 20 * (rco@di.uminho.pt) Routing table insertion and update 21 * Linus Torvalds : Rewrote bits to be sensible 22 * Alan Cox : Added BSD route gw semantics 23 * Alan Cox : Super /proc >4K 24 * Alan Cox : MTU in route table 25 * Alan Cox : MSS actually. Also added the window 26 * clamper. 27 * Sam Lantinga : Fixed route matching in rt_del() 28 * Alan Cox : Routing cache support. 29 * Alan Cox : Removed compatibility cruft. 30 * Alan Cox : RTF_REJECT support. 31 * Alan Cox : TCP irtt support. 32 * Jonathan Naylor : Added Metric support. 33 * Miquel van Smoorenburg : BSD API fixes. 34 * Miquel van Smoorenburg : Metrics. 35 * Alan Cox : Use __u32 properly 36 * Alan Cox : Aligned routing errors more closely with BSD 37 * our system is still very different. 38 * Alan Cox : Faster /proc handling 39 * Alexey Kuznetsov : Massive rework to support tree based routing, 40 * routing caches and better behaviour. 41 * 42 * Olaf Erb : irtt wasn't being copied right. 43 * Bjorn Ekwall : Kerneld route support. 44 * Alan Cox : Multicast fixed (I hope) 45 * Pavel Krauz : Limited broadcast fixed 46 * Mike McLagan : Routing by source 47 * Alexey Kuznetsov : End of old history. Split to fib.c and 48 * route.c and rewritten from scratch. 49 * Andi Kleen : Load-limit warning messages. 50 * Vitaly E. Lavrov : Transparent proxy revived after year coma. 51 * Vitaly E. Lavrov : Race condition in ip_route_input_slow. 52 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. 53 * Vladimir V. Ivanov : IP rule info (flowid) is really useful. 54 * Marc Boucher : routing by fwmark 55 * Robert Olsson : Added rt_cache statistics 56 * Arnaldo C. Melo : Convert proc stuff to seq_file 57 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. 58 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect 59 * Ilia Sotnikov : Removed TOS from hash calculations 60 * 61 * This program is free software; you can redistribute it and/or 62 * modify it under the terms of the GNU General Public License 63 * as published by the Free Software Foundation; either version 64 * 2 of the License, or (at your option) any later version. 65 */ 66 67 #include <linux/module.h> 68 #include <asm/uaccess.h> 69 #include <asm/system.h> 70 #include <linux/bitops.h> 71 #include <linux/types.h> 72 #include <linux/kernel.h> 73 #include <linux/sched.h> 74 #include <linux/mm.h> 75 #include <linux/bootmem.h> 76 #include <linux/string.h> 77 #include <linux/socket.h> 78 #include <linux/sockios.h> 79 #include <linux/errno.h> 80 #include <linux/in.h> 81 #include <linux/inet.h> 82 #include <linux/netdevice.h> 83 #include <linux/proc_fs.h> 84 #include <linux/init.h> 85 #include <linux/skbuff.h> 86 #include <linux/rtnetlink.h> 87 #include <linux/inetdevice.h> 88 #include <linux/igmp.h> 89 #include <linux/pkt_sched.h> 90 #include <linux/mroute.h> 91 #include <linux/netfilter_ipv4.h> 92 #include <linux/random.h> 93 #include <linux/jhash.h> 94 #include <linux/rcupdate.h> 95 #include <linux/times.h> 96 #include <net/protocol.h> 97 #include <net/ip.h> 98 #include <net/route.h> 99 #include <net/inetpeer.h> 100 #include <net/sock.h> 101 #include <net/ip_fib.h> 102 #include <net/arp.h> 103 #include <net/tcp.h> 104 #include <net/icmp.h> 105 #include <net/xfrm.h> 106 #include <net/ip_mp_alg.h> 107 #ifdef CONFIG_SYSCTL 108 #include <linux/sysctl.h> 109 #endif 110 111 #define RT_FL_TOS(oldflp) \ 112 ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 113 114 #define IP_MAX_MTU 0xFFF0 115 116 #define RT_GC_TIMEOUT (300*HZ) 117 118 static int ip_rt_min_delay = 2 * HZ; 119 static int ip_rt_max_delay = 10 * HZ; 120 static int ip_rt_max_size; 121 static int ip_rt_gc_timeout = RT_GC_TIMEOUT; 122 static int ip_rt_gc_interval = 60 * HZ; 123 static int ip_rt_gc_min_interval = HZ / 2; 124 static int ip_rt_redirect_number = 9; 125 static int ip_rt_redirect_load = HZ / 50; 126 static int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); 127 static int ip_rt_error_cost = HZ; 128 static int ip_rt_error_burst = 5 * HZ; 129 static int ip_rt_gc_elasticity = 8; 130 static int ip_rt_mtu_expires = 10 * 60 * HZ; 131 static int ip_rt_min_pmtu = 512 + 20 + 20; 132 static int ip_rt_min_advmss = 256; 133 static int ip_rt_secret_interval = 10 * 60 * HZ; 134 static unsigned long rt_deadline; 135 136 #define RTprint(a...) printk(KERN_DEBUG a) 137 138 static struct timer_list rt_flush_timer; 139 static struct timer_list rt_periodic_timer; 140 static struct timer_list rt_secret_timer; 141 142 /* 143 * Interface to generic destination cache. 144 */ 145 146 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); 147 static void ipv4_dst_destroy(struct dst_entry *dst); 148 static void ipv4_dst_ifdown(struct dst_entry *dst, 149 struct net_device *dev, int how); 150 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 151 static void ipv4_link_failure(struct sk_buff *skb); 152 static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 153 static int rt_garbage_collect(void); 154 155 156 static struct dst_ops ipv4_dst_ops = { 157 .family = AF_INET, 158 .protocol = __constant_htons(ETH_P_IP), 159 .gc = rt_garbage_collect, 160 .check = ipv4_dst_check, 161 .destroy = ipv4_dst_destroy, 162 .ifdown = ipv4_dst_ifdown, 163 .negative_advice = ipv4_negative_advice, 164 .link_failure = ipv4_link_failure, 165 .update_pmtu = ip_rt_update_pmtu, 166 .entry_size = sizeof(struct rtable), 167 }; 168 169 #define ECN_OR_COST(class) TC_PRIO_##class 170 171 __u8 ip_tos2prio[16] = { 172 TC_PRIO_BESTEFFORT, 173 ECN_OR_COST(FILLER), 174 TC_PRIO_BESTEFFORT, 175 ECN_OR_COST(BESTEFFORT), 176 TC_PRIO_BULK, 177 ECN_OR_COST(BULK), 178 TC_PRIO_BULK, 179 ECN_OR_COST(BULK), 180 TC_PRIO_INTERACTIVE, 181 ECN_OR_COST(INTERACTIVE), 182 TC_PRIO_INTERACTIVE, 183 ECN_OR_COST(INTERACTIVE), 184 TC_PRIO_INTERACTIVE_BULK, 185 ECN_OR_COST(INTERACTIVE_BULK), 186 TC_PRIO_INTERACTIVE_BULK, 187 ECN_OR_COST(INTERACTIVE_BULK) 188 }; 189 190 191 /* 192 * Route cache. 193 */ 194 195 /* The locking scheme is rather straight forward: 196 * 197 * 1) Read-Copy Update protects the buckets of the central route hash. 198 * 2) Only writers remove entries, and they hold the lock 199 * as they look at rtable reference counts. 200 * 3) Only readers acquire references to rtable entries, 201 * they do so with atomic increments and with the 202 * lock held. 203 */ 204 205 struct rt_hash_bucket { 206 struct rtable *chain; 207 }; 208 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ 209 defined(CONFIG_PROVE_LOCKING) 210 /* 211 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks 212 * The size of this table is a power of two and depends on the number of CPUS. 213 * (on lockdep we have a quite big spinlock_t, so keep the size down there) 214 */ 215 #ifdef CONFIG_LOCKDEP 216 # define RT_HASH_LOCK_SZ 256 217 #else 218 # if NR_CPUS >= 32 219 # define RT_HASH_LOCK_SZ 4096 220 # elif NR_CPUS >= 16 221 # define RT_HASH_LOCK_SZ 2048 222 # elif NR_CPUS >= 8 223 # define RT_HASH_LOCK_SZ 1024 224 # elif NR_CPUS >= 4 225 # define RT_HASH_LOCK_SZ 512 226 # else 227 # define RT_HASH_LOCK_SZ 256 228 # endif 229 #endif 230 231 static spinlock_t *rt_hash_locks; 232 # define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] 233 # define rt_hash_lock_init() { \ 234 int i; \ 235 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \ 236 if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \ 237 for (i = 0; i < RT_HASH_LOCK_SZ; i++) \ 238 spin_lock_init(&rt_hash_locks[i]); \ 239 } 240 #else 241 # define rt_hash_lock_addr(slot) NULL 242 # define rt_hash_lock_init() 243 #endif 244 245 static struct rt_hash_bucket *rt_hash_table; 246 static unsigned rt_hash_mask; 247 static int rt_hash_log; 248 static unsigned int rt_hash_rnd; 249 250 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 251 #define RT_CACHE_STAT_INC(field) \ 252 (__raw_get_cpu_var(rt_cache_stat).field++) 253 254 static int rt_intern_hash(unsigned hash, struct rtable *rth, 255 struct rtable **res); 256 257 static unsigned int rt_hash_code(u32 daddr, u32 saddr) 258 { 259 return (jhash_2words(daddr, saddr, rt_hash_rnd) 260 & rt_hash_mask); 261 } 262 263 #ifdef CONFIG_PROC_FS 264 struct rt_cache_iter_state { 265 int bucket; 266 }; 267 268 static struct rtable *rt_cache_get_first(struct seq_file *seq) 269 { 270 struct rtable *r = NULL; 271 struct rt_cache_iter_state *st = seq->private; 272 273 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 274 rcu_read_lock_bh(); 275 r = rt_hash_table[st->bucket].chain; 276 if (r) 277 break; 278 rcu_read_unlock_bh(); 279 } 280 return r; 281 } 282 283 static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) 284 { 285 struct rt_cache_iter_state *st = rcu_dereference(seq->private); 286 287 r = r->u.rt_next; 288 while (!r) { 289 rcu_read_unlock_bh(); 290 if (--st->bucket < 0) 291 break; 292 rcu_read_lock_bh(); 293 r = rt_hash_table[st->bucket].chain; 294 } 295 return r; 296 } 297 298 static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) 299 { 300 struct rtable *r = rt_cache_get_first(seq); 301 302 if (r) 303 while (pos && (r = rt_cache_get_next(seq, r))) 304 --pos; 305 return pos ? NULL : r; 306 } 307 308 static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 309 { 310 return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 311 } 312 313 static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 314 { 315 struct rtable *r = NULL; 316 317 if (v == SEQ_START_TOKEN) 318 r = rt_cache_get_first(seq); 319 else 320 r = rt_cache_get_next(seq, v); 321 ++*pos; 322 return r; 323 } 324 325 static void rt_cache_seq_stop(struct seq_file *seq, void *v) 326 { 327 if (v && v != SEQ_START_TOKEN) 328 rcu_read_unlock_bh(); 329 } 330 331 static int rt_cache_seq_show(struct seq_file *seq, void *v) 332 { 333 if (v == SEQ_START_TOKEN) 334 seq_printf(seq, "%-127s\n", 335 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" 336 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" 337 "HHUptod\tSpecDst"); 338 else { 339 struct rtable *r = v; 340 char temp[256]; 341 342 sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" 343 "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X", 344 r->u.dst.dev ? r->u.dst.dev->name : "*", 345 (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, 346 r->rt_flags, atomic_read(&r->u.dst.__refcnt), 347 r->u.dst.__use, 0, (unsigned long)r->rt_src, 348 (dst_metric(&r->u.dst, RTAX_ADVMSS) ? 349 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), 350 dst_metric(&r->u.dst, RTAX_WINDOW), 351 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + 352 dst_metric(&r->u.dst, RTAX_RTTVAR)), 353 r->fl.fl4_tos, 354 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, 355 r->u.dst.hh ? (r->u.dst.hh->hh_output == 356 dev_queue_xmit) : 0, 357 r->rt_spec_dst); 358 seq_printf(seq, "%-127s\n", temp); 359 } 360 return 0; 361 } 362 363 static struct seq_operations rt_cache_seq_ops = { 364 .start = rt_cache_seq_start, 365 .next = rt_cache_seq_next, 366 .stop = rt_cache_seq_stop, 367 .show = rt_cache_seq_show, 368 }; 369 370 static int rt_cache_seq_open(struct inode *inode, struct file *file) 371 { 372 struct seq_file *seq; 373 int rc = -ENOMEM; 374 struct rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); 375 376 if (!s) 377 goto out; 378 rc = seq_open(file, &rt_cache_seq_ops); 379 if (rc) 380 goto out_kfree; 381 seq = file->private_data; 382 seq->private = s; 383 memset(s, 0, sizeof(*s)); 384 out: 385 return rc; 386 out_kfree: 387 kfree(s); 388 goto out; 389 } 390 391 static struct file_operations rt_cache_seq_fops = { 392 .owner = THIS_MODULE, 393 .open = rt_cache_seq_open, 394 .read = seq_read, 395 .llseek = seq_lseek, 396 .release = seq_release_private, 397 }; 398 399 400 static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) 401 { 402 int cpu; 403 404 if (*pos == 0) 405 return SEQ_START_TOKEN; 406 407 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { 408 if (!cpu_possible(cpu)) 409 continue; 410 *pos = cpu+1; 411 return &per_cpu(rt_cache_stat, cpu); 412 } 413 return NULL; 414 } 415 416 static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) 417 { 418 int cpu; 419 420 for (cpu = *pos; cpu < NR_CPUS; ++cpu) { 421 if (!cpu_possible(cpu)) 422 continue; 423 *pos = cpu+1; 424 return &per_cpu(rt_cache_stat, cpu); 425 } 426 return NULL; 427 428 } 429 430 static void rt_cpu_seq_stop(struct seq_file *seq, void *v) 431 { 432 433 } 434 435 static int rt_cpu_seq_show(struct seq_file *seq, void *v) 436 { 437 struct rt_cache_stat *st = v; 438 439 if (v == SEQ_START_TOKEN) { 440 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); 441 return 0; 442 } 443 444 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 445 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 446 atomic_read(&ipv4_dst_ops.entries), 447 st->in_hit, 448 st->in_slow_tot, 449 st->in_slow_mc, 450 st->in_no_route, 451 st->in_brd, 452 st->in_martian_dst, 453 st->in_martian_src, 454 455 st->out_hit, 456 st->out_slow_tot, 457 st->out_slow_mc, 458 459 st->gc_total, 460 st->gc_ignored, 461 st->gc_goal_miss, 462 st->gc_dst_overflow, 463 st->in_hlist_search, 464 st->out_hlist_search 465 ); 466 return 0; 467 } 468 469 static struct seq_operations rt_cpu_seq_ops = { 470 .start = rt_cpu_seq_start, 471 .next = rt_cpu_seq_next, 472 .stop = rt_cpu_seq_stop, 473 .show = rt_cpu_seq_show, 474 }; 475 476 477 static int rt_cpu_seq_open(struct inode *inode, struct file *file) 478 { 479 return seq_open(file, &rt_cpu_seq_ops); 480 } 481 482 static struct file_operations rt_cpu_seq_fops = { 483 .owner = THIS_MODULE, 484 .open = rt_cpu_seq_open, 485 .read = seq_read, 486 .llseek = seq_lseek, 487 .release = seq_release, 488 }; 489 490 #endif /* CONFIG_PROC_FS */ 491 492 static __inline__ void rt_free(struct rtable *rt) 493 { 494 multipath_remove(rt); 495 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 496 } 497 498 static __inline__ void rt_drop(struct rtable *rt) 499 { 500 multipath_remove(rt); 501 ip_rt_put(rt); 502 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 503 } 504 505 static __inline__ int rt_fast_clean(struct rtable *rth) 506 { 507 /* Kill broadcast/multicast entries very aggresively, if they 508 collide in hash table with more useful entries */ 509 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && 510 rth->fl.iif && rth->u.rt_next; 511 } 512 513 static __inline__ int rt_valuable(struct rtable *rth) 514 { 515 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 516 rth->u.dst.expires; 517 } 518 519 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 520 { 521 unsigned long age; 522 int ret = 0; 523 524 if (atomic_read(&rth->u.dst.__refcnt)) 525 goto out; 526 527 ret = 1; 528 if (rth->u.dst.expires && 529 time_after_eq(jiffies, rth->u.dst.expires)) 530 goto out; 531 532 age = jiffies - rth->u.dst.lastuse; 533 ret = 0; 534 if ((age <= tmo1 && !rt_fast_clean(rth)) || 535 (age <= tmo2 && rt_valuable(rth))) 536 goto out; 537 ret = 1; 538 out: return ret; 539 } 540 541 /* Bits of score are: 542 * 31: very valuable 543 * 30: not quite useless 544 * 29..0: usage counter 545 */ 546 static inline u32 rt_score(struct rtable *rt) 547 { 548 u32 score = jiffies - rt->u.dst.lastuse; 549 550 score = ~score & ~(3<<30); 551 552 if (rt_valuable(rt)) 553 score |= (1<<31); 554 555 if (!rt->fl.iif || 556 !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) 557 score |= (1<<30); 558 559 return score; 560 } 561 562 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 563 { 564 return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 && 565 fl1->oif == fl2->oif && 566 fl1->iif == fl2->iif; 567 } 568 569 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 570 static struct rtable **rt_remove_balanced_route(struct rtable **chain_head, 571 struct rtable *expentry, 572 int *removed_count) 573 { 574 int passedexpired = 0; 575 struct rtable **nextstep = NULL; 576 struct rtable **rthp = chain_head; 577 struct rtable *rth; 578 579 if (removed_count) 580 *removed_count = 0; 581 582 while ((rth = *rthp) != NULL) { 583 if (rth == expentry) 584 passedexpired = 1; 585 586 if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 && 587 compare_keys(&(*rthp)->fl, &expentry->fl)) { 588 if (*rthp == expentry) { 589 *rthp = rth->u.rt_next; 590 continue; 591 } else { 592 *rthp = rth->u.rt_next; 593 rt_free(rth); 594 if (removed_count) 595 ++(*removed_count); 596 } 597 } else { 598 if (!((*rthp)->u.dst.flags & DST_BALANCED) && 599 passedexpired && !nextstep) 600 nextstep = &rth->u.rt_next; 601 602 rthp = &rth->u.rt_next; 603 } 604 } 605 606 rt_free(expentry); 607 if (removed_count) 608 ++(*removed_count); 609 610 return nextstep; 611 } 612 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 613 614 615 /* This runs via a timer and thus is always in BH context. */ 616 static void rt_check_expire(unsigned long dummy) 617 { 618 static unsigned int rover; 619 unsigned int i = rover, goal; 620 struct rtable *rth, **rthp; 621 unsigned long now = jiffies; 622 u64 mult; 623 624 mult = ((u64)ip_rt_gc_interval) << rt_hash_log; 625 if (ip_rt_gc_timeout > 1) 626 do_div(mult, ip_rt_gc_timeout); 627 goal = (unsigned int)mult; 628 if (goal > rt_hash_mask) goal = rt_hash_mask + 1; 629 for (; goal > 0; goal--) { 630 unsigned long tmo = ip_rt_gc_timeout; 631 632 i = (i + 1) & rt_hash_mask; 633 rthp = &rt_hash_table[i].chain; 634 635 if (*rthp == 0) 636 continue; 637 spin_lock(rt_hash_lock_addr(i)); 638 while ((rth = *rthp) != NULL) { 639 if (rth->u.dst.expires) { 640 /* Entry is expired even if it is in use */ 641 if (time_before_eq(now, rth->u.dst.expires)) { 642 tmo >>= 1; 643 rthp = &rth->u.rt_next; 644 continue; 645 } 646 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { 647 tmo >>= 1; 648 rthp = &rth->u.rt_next; 649 continue; 650 } 651 652 /* Cleanup aged off entries. */ 653 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 654 /* remove all related balanced entries if necessary */ 655 if (rth->u.dst.flags & DST_BALANCED) { 656 rthp = rt_remove_balanced_route( 657 &rt_hash_table[i].chain, 658 rth, NULL); 659 if (!rthp) 660 break; 661 } else { 662 *rthp = rth->u.rt_next; 663 rt_free(rth); 664 } 665 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 666 *rthp = rth->u.rt_next; 667 rt_free(rth); 668 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 669 } 670 spin_unlock(rt_hash_lock_addr(i)); 671 672 /* Fallback loop breaker. */ 673 if (time_after(jiffies, now)) 674 break; 675 } 676 rover = i; 677 mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval); 678 } 679 680 /* This can run from both BH and non-BH contexts, the latter 681 * in the case of a forced flush event. 682 */ 683 static void rt_run_flush(unsigned long dummy) 684 { 685 int i; 686 struct rtable *rth, *next; 687 688 rt_deadline = 0; 689 690 get_random_bytes(&rt_hash_rnd, 4); 691 692 for (i = rt_hash_mask; i >= 0; i--) { 693 spin_lock_bh(rt_hash_lock_addr(i)); 694 rth = rt_hash_table[i].chain; 695 if (rth) 696 rt_hash_table[i].chain = NULL; 697 spin_unlock_bh(rt_hash_lock_addr(i)); 698 699 for (; rth; rth = next) { 700 next = rth->u.rt_next; 701 rt_free(rth); 702 } 703 } 704 } 705 706 static DEFINE_SPINLOCK(rt_flush_lock); 707 708 void rt_cache_flush(int delay) 709 { 710 unsigned long now = jiffies; 711 int user_mode = !in_softirq(); 712 713 if (delay < 0) 714 delay = ip_rt_min_delay; 715 716 /* flush existing multipath state*/ 717 multipath_flush(); 718 719 spin_lock_bh(&rt_flush_lock); 720 721 if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { 722 long tmo = (long)(rt_deadline - now); 723 724 /* If flush timer is already running 725 and flush request is not immediate (delay > 0): 726 727 if deadline is not achieved, prolongate timer to "delay", 728 otherwise fire it at deadline time. 729 */ 730 731 if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay) 732 tmo = 0; 733 734 if (delay > tmo) 735 delay = tmo; 736 } 737 738 if (delay <= 0) { 739 spin_unlock_bh(&rt_flush_lock); 740 rt_run_flush(0); 741 return; 742 } 743 744 if (rt_deadline == 0) 745 rt_deadline = now + ip_rt_max_delay; 746 747 mod_timer(&rt_flush_timer, now+delay); 748 spin_unlock_bh(&rt_flush_lock); 749 } 750 751 static void rt_secret_rebuild(unsigned long dummy) 752 { 753 unsigned long now = jiffies; 754 755 rt_cache_flush(0); 756 mod_timer(&rt_secret_timer, now + ip_rt_secret_interval); 757 } 758 759 /* 760 Short description of GC goals. 761 762 We want to build algorithm, which will keep routing cache 763 at some equilibrium point, when number of aged off entries 764 is kept approximately equal to newly generated ones. 765 766 Current expiration strength is variable "expire". 767 We try to adjust it dynamically, so that if networking 768 is idle expires is large enough to keep enough of warm entries, 769 and when load increases it reduces to limit cache size. 770 */ 771 772 static int rt_garbage_collect(void) 773 { 774 static unsigned long expire = RT_GC_TIMEOUT; 775 static unsigned long last_gc; 776 static int rover; 777 static int equilibrium; 778 struct rtable *rth, **rthp; 779 unsigned long now = jiffies; 780 int goal; 781 782 /* 783 * Garbage collection is pretty expensive, 784 * do not make it too frequently. 785 */ 786 787 RT_CACHE_STAT_INC(gc_total); 788 789 if (now - last_gc < ip_rt_gc_min_interval && 790 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { 791 RT_CACHE_STAT_INC(gc_ignored); 792 goto out; 793 } 794 795 /* Calculate number of entries, which we want to expire now. */ 796 goal = atomic_read(&ipv4_dst_ops.entries) - 797 (ip_rt_gc_elasticity << rt_hash_log); 798 if (goal <= 0) { 799 if (equilibrium < ipv4_dst_ops.gc_thresh) 800 equilibrium = ipv4_dst_ops.gc_thresh; 801 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 802 if (goal > 0) { 803 equilibrium += min_t(unsigned int, goal / 2, rt_hash_mask + 1); 804 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 805 } 806 } else { 807 /* We are in dangerous area. Try to reduce cache really 808 * aggressively. 809 */ 810 goal = max_t(unsigned int, goal / 2, rt_hash_mask + 1); 811 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 812 } 813 814 if (now - last_gc >= ip_rt_gc_min_interval) 815 last_gc = now; 816 817 if (goal <= 0) { 818 equilibrium += goal; 819 goto work_done; 820 } 821 822 do { 823 int i, k; 824 825 for (i = rt_hash_mask, k = rover; i >= 0; i--) { 826 unsigned long tmo = expire; 827 828 k = (k + 1) & rt_hash_mask; 829 rthp = &rt_hash_table[k].chain; 830 spin_lock_bh(rt_hash_lock_addr(k)); 831 while ((rth = *rthp) != NULL) { 832 if (!rt_may_expire(rth, tmo, expire)) { 833 tmo >>= 1; 834 rthp = &rth->u.rt_next; 835 continue; 836 } 837 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 838 /* remove all related balanced entries 839 * if necessary 840 */ 841 if (rth->u.dst.flags & DST_BALANCED) { 842 int r; 843 844 rthp = rt_remove_balanced_route( 845 &rt_hash_table[k].chain, 846 rth, 847 &r); 848 goal -= r; 849 if (!rthp) 850 break; 851 } else { 852 *rthp = rth->u.rt_next; 853 rt_free(rth); 854 goal--; 855 } 856 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 857 *rthp = rth->u.rt_next; 858 rt_free(rth); 859 goal--; 860 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 861 } 862 spin_unlock_bh(rt_hash_lock_addr(k)); 863 if (goal <= 0) 864 break; 865 } 866 rover = k; 867 868 if (goal <= 0) 869 goto work_done; 870 871 /* Goal is not achieved. We stop process if: 872 873 - if expire reduced to zero. Otherwise, expire is halfed. 874 - if table is not full. 875 - if we are called from interrupt. 876 - jiffies check is just fallback/debug loop breaker. 877 We will not spin here for long time in any case. 878 */ 879 880 RT_CACHE_STAT_INC(gc_goal_miss); 881 882 if (expire == 0) 883 break; 884 885 expire >>= 1; 886 #if RT_CACHE_DEBUG >= 2 887 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, 888 atomic_read(&ipv4_dst_ops.entries), goal, i); 889 #endif 890 891 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 892 goto out; 893 } while (!in_softirq() && time_before_eq(jiffies, now)); 894 895 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 896 goto out; 897 if (net_ratelimit()) 898 printk(KERN_WARNING "dst cache overflow\n"); 899 RT_CACHE_STAT_INC(gc_dst_overflow); 900 return 1; 901 902 work_done: 903 expire += ip_rt_gc_min_interval; 904 if (expire > ip_rt_gc_timeout || 905 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) 906 expire = ip_rt_gc_timeout; 907 #if RT_CACHE_DEBUG >= 2 908 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, 909 atomic_read(&ipv4_dst_ops.entries), goal, rover); 910 #endif 911 out: return 0; 912 } 913 914 static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) 915 { 916 struct rtable *rth, **rthp; 917 unsigned long now; 918 struct rtable *cand, **candp; 919 u32 min_score; 920 int chain_length; 921 int attempts = !in_softirq(); 922 923 restart: 924 chain_length = 0; 925 min_score = ~(u32)0; 926 cand = NULL; 927 candp = NULL; 928 now = jiffies; 929 930 rthp = &rt_hash_table[hash].chain; 931 932 spin_lock_bh(rt_hash_lock_addr(hash)); 933 while ((rth = *rthp) != NULL) { 934 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 935 if (!(rth->u.dst.flags & DST_BALANCED) && 936 compare_keys(&rth->fl, &rt->fl)) { 937 #else 938 if (compare_keys(&rth->fl, &rt->fl)) { 939 #endif 940 /* Put it first */ 941 *rthp = rth->u.rt_next; 942 /* 943 * Since lookup is lockfree, the deletion 944 * must be visible to another weakly ordered CPU before 945 * the insertion at the start of the hash chain. 946 */ 947 rcu_assign_pointer(rth->u.rt_next, 948 rt_hash_table[hash].chain); 949 /* 950 * Since lookup is lockfree, the update writes 951 * must be ordered for consistency on SMP. 952 */ 953 rcu_assign_pointer(rt_hash_table[hash].chain, rth); 954 955 rth->u.dst.__use++; 956 dst_hold(&rth->u.dst); 957 rth->u.dst.lastuse = now; 958 spin_unlock_bh(rt_hash_lock_addr(hash)); 959 960 rt_drop(rt); 961 *rp = rth; 962 return 0; 963 } 964 965 if (!atomic_read(&rth->u.dst.__refcnt)) { 966 u32 score = rt_score(rth); 967 968 if (score <= min_score) { 969 cand = rth; 970 candp = rthp; 971 min_score = score; 972 } 973 } 974 975 chain_length++; 976 977 rthp = &rth->u.rt_next; 978 } 979 980 if (cand) { 981 /* ip_rt_gc_elasticity used to be average length of chain 982 * length, when exceeded gc becomes really aggressive. 983 * 984 * The second limit is less certain. At the moment it allows 985 * only 2 entries per bucket. We will see. 986 */ 987 if (chain_length > ip_rt_gc_elasticity) { 988 *candp = cand->u.rt_next; 989 rt_free(cand); 990 } 991 } 992 993 /* Try to bind route to arp only if it is output 994 route or unicast forwarding path. 995 */ 996 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 997 int err = arp_bind_neighbour(&rt->u.dst); 998 if (err) { 999 spin_unlock_bh(rt_hash_lock_addr(hash)); 1000 1001 if (err != -ENOBUFS) { 1002 rt_drop(rt); 1003 return err; 1004 } 1005 1006 /* Neighbour tables are full and nothing 1007 can be released. Try to shrink route cache, 1008 it is most likely it holds some neighbour records. 1009 */ 1010 if (attempts-- > 0) { 1011 int saved_elasticity = ip_rt_gc_elasticity; 1012 int saved_int = ip_rt_gc_min_interval; 1013 ip_rt_gc_elasticity = 1; 1014 ip_rt_gc_min_interval = 0; 1015 rt_garbage_collect(); 1016 ip_rt_gc_min_interval = saved_int; 1017 ip_rt_gc_elasticity = saved_elasticity; 1018 goto restart; 1019 } 1020 1021 if (net_ratelimit()) 1022 printk(KERN_WARNING "Neighbour table overflow.\n"); 1023 rt_drop(rt); 1024 return -ENOBUFS; 1025 } 1026 } 1027 1028 rt->u.rt_next = rt_hash_table[hash].chain; 1029 #if RT_CACHE_DEBUG >= 2 1030 if (rt->u.rt_next) { 1031 struct rtable *trt; 1032 printk(KERN_DEBUG "rt_cache @%02x: %u.%u.%u.%u", hash, 1033 NIPQUAD(rt->rt_dst)); 1034 for (trt = rt->u.rt_next; trt; trt = trt->u.rt_next) 1035 printk(" . %u.%u.%u.%u", NIPQUAD(trt->rt_dst)); 1036 printk("\n"); 1037 } 1038 #endif 1039 rt_hash_table[hash].chain = rt; 1040 spin_unlock_bh(rt_hash_lock_addr(hash)); 1041 *rp = rt; 1042 return 0; 1043 } 1044 1045 void rt_bind_peer(struct rtable *rt, int create) 1046 { 1047 static DEFINE_SPINLOCK(rt_peer_lock); 1048 struct inet_peer *peer; 1049 1050 peer = inet_getpeer(rt->rt_dst, create); 1051 1052 spin_lock_bh(&rt_peer_lock); 1053 if (rt->peer == NULL) { 1054 rt->peer = peer; 1055 peer = NULL; 1056 } 1057 spin_unlock_bh(&rt_peer_lock); 1058 if (peer) 1059 inet_putpeer(peer); 1060 } 1061 1062 /* 1063 * Peer allocation may fail only in serious out-of-memory conditions. However 1064 * we still can generate some output. 1065 * Random ID selection looks a bit dangerous because we have no chances to 1066 * select ID being unique in a reasonable period of time. 1067 * But broken packet identifier may be better than no packet at all. 1068 */ 1069 static void ip_select_fb_ident(struct iphdr *iph) 1070 { 1071 static DEFINE_SPINLOCK(ip_fb_id_lock); 1072 static u32 ip_fallback_id; 1073 u32 salt; 1074 1075 spin_lock_bh(&ip_fb_id_lock); 1076 salt = secure_ip_id(ip_fallback_id ^ iph->daddr); 1077 iph->id = htons(salt & 0xFFFF); 1078 ip_fallback_id = salt; 1079 spin_unlock_bh(&ip_fb_id_lock); 1080 } 1081 1082 void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) 1083 { 1084 struct rtable *rt = (struct rtable *) dst; 1085 1086 if (rt) { 1087 if (rt->peer == NULL) 1088 rt_bind_peer(rt, 1); 1089 1090 /* If peer is attached to destination, it is never detached, 1091 so that we need not to grab a lock to dereference it. 1092 */ 1093 if (rt->peer) { 1094 iph->id = htons(inet_getid(rt->peer, more)); 1095 return; 1096 } 1097 } else 1098 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", 1099 __builtin_return_address(0)); 1100 1101 ip_select_fb_ident(iph); 1102 } 1103 1104 static void rt_del(unsigned hash, struct rtable *rt) 1105 { 1106 struct rtable **rthp; 1107 1108 spin_lock_bh(rt_hash_lock_addr(hash)); 1109 ip_rt_put(rt); 1110 for (rthp = &rt_hash_table[hash].chain; *rthp; 1111 rthp = &(*rthp)->u.rt_next) 1112 if (*rthp == rt) { 1113 *rthp = rt->u.rt_next; 1114 rt_free(rt); 1115 break; 1116 } 1117 spin_unlock_bh(rt_hash_lock_addr(hash)); 1118 } 1119 1120 void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, 1121 u32 saddr, struct net_device *dev) 1122 { 1123 int i, k; 1124 struct in_device *in_dev = in_dev_get(dev); 1125 struct rtable *rth, **rthp; 1126 u32 skeys[2] = { saddr, 0 }; 1127 int ikeys[2] = { dev->ifindex, 0 }; 1128 1129 if (!in_dev) 1130 return; 1131 1132 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) 1133 || MULTICAST(new_gw) || BADCLASS(new_gw) || ZERONET(new_gw)) 1134 goto reject_redirect; 1135 1136 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1137 if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 1138 goto reject_redirect; 1139 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) 1140 goto reject_redirect; 1141 } else { 1142 if (inet_addr_type(new_gw) != RTN_UNICAST) 1143 goto reject_redirect; 1144 } 1145 1146 for (i = 0; i < 2; i++) { 1147 for (k = 0; k < 2; k++) { 1148 unsigned hash = rt_hash_code(daddr, 1149 skeys[i] ^ (ikeys[k] << 5)); 1150 1151 rthp=&rt_hash_table[hash].chain; 1152 1153 rcu_read_lock(); 1154 while ((rth = rcu_dereference(*rthp)) != NULL) { 1155 struct rtable *rt; 1156 1157 if (rth->fl.fl4_dst != daddr || 1158 rth->fl.fl4_src != skeys[i] || 1159 rth->fl.oif != ikeys[k] || 1160 rth->fl.iif != 0) { 1161 rthp = &rth->u.rt_next; 1162 continue; 1163 } 1164 1165 if (rth->rt_dst != daddr || 1166 rth->rt_src != saddr || 1167 rth->u.dst.error || 1168 rth->rt_gateway != old_gw || 1169 rth->u.dst.dev != dev) 1170 break; 1171 1172 dst_hold(&rth->u.dst); 1173 rcu_read_unlock(); 1174 1175 rt = dst_alloc(&ipv4_dst_ops); 1176 if (rt == NULL) { 1177 ip_rt_put(rth); 1178 in_dev_put(in_dev); 1179 return; 1180 } 1181 1182 /* Copy all the information. */ 1183 *rt = *rth; 1184 INIT_RCU_HEAD(&rt->u.dst.rcu_head); 1185 rt->u.dst.__use = 1; 1186 atomic_set(&rt->u.dst.__refcnt, 1); 1187 rt->u.dst.child = NULL; 1188 if (rt->u.dst.dev) 1189 dev_hold(rt->u.dst.dev); 1190 if (rt->idev) 1191 in_dev_hold(rt->idev); 1192 rt->u.dst.obsolete = 0; 1193 rt->u.dst.lastuse = jiffies; 1194 rt->u.dst.path = &rt->u.dst; 1195 rt->u.dst.neighbour = NULL; 1196 rt->u.dst.hh = NULL; 1197 rt->u.dst.xfrm = NULL; 1198 1199 rt->rt_flags |= RTCF_REDIRECTED; 1200 1201 /* Gateway is different ... */ 1202 rt->rt_gateway = new_gw; 1203 1204 /* Redirect received -> path was valid */ 1205 dst_confirm(&rth->u.dst); 1206 1207 if (rt->peer) 1208 atomic_inc(&rt->peer->refcnt); 1209 1210 if (arp_bind_neighbour(&rt->u.dst) || 1211 !(rt->u.dst.neighbour->nud_state & 1212 NUD_VALID)) { 1213 if (rt->u.dst.neighbour) 1214 neigh_event_send(rt->u.dst.neighbour, NULL); 1215 ip_rt_put(rth); 1216 rt_drop(rt); 1217 goto do_next; 1218 } 1219 1220 rt_del(hash, rth); 1221 if (!rt_intern_hash(hash, rt, &rt)) 1222 ip_rt_put(rt); 1223 goto do_next; 1224 } 1225 rcu_read_unlock(); 1226 do_next: 1227 ; 1228 } 1229 } 1230 in_dev_put(in_dev); 1231 return; 1232 1233 reject_redirect: 1234 #ifdef CONFIG_IP_ROUTE_VERBOSE 1235 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 1236 printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " 1237 "%u.%u.%u.%u ignored.\n" 1238 " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n", 1239 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), 1240 NIPQUAD(saddr), NIPQUAD(daddr)); 1241 #endif 1242 in_dev_put(in_dev); 1243 } 1244 1245 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1246 { 1247 struct rtable *rt = (struct rtable*)dst; 1248 struct dst_entry *ret = dst; 1249 1250 if (rt) { 1251 if (dst->obsolete) { 1252 ip_rt_put(rt); 1253 ret = NULL; 1254 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1255 rt->u.dst.expires) { 1256 unsigned hash = rt_hash_code(rt->fl.fl4_dst, 1257 rt->fl.fl4_src ^ 1258 (rt->fl.oif << 5)); 1259 #if RT_CACHE_DEBUG >= 1 1260 printk(KERN_DEBUG "ip_rt_advice: redirect to " 1261 "%u.%u.%u.%u/%02x dropped\n", 1262 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); 1263 #endif 1264 rt_del(hash, rt); 1265 ret = NULL; 1266 } 1267 } 1268 return ret; 1269 } 1270 1271 /* 1272 * Algorithm: 1273 * 1. The first ip_rt_redirect_number redirects are sent 1274 * with exponential backoff, then we stop sending them at all, 1275 * assuming that the host ignores our redirects. 1276 * 2. If we did not see packets requiring redirects 1277 * during ip_rt_redirect_silence, we assume that the host 1278 * forgot redirected route and start to send redirects again. 1279 * 1280 * This algorithm is much cheaper and more intelligent than dumb load limiting 1281 * in icmp.c. 1282 * 1283 * NOTE. Do not forget to inhibit load limiting for redirects (redundant) 1284 * and "frag. need" (breaks PMTU discovery) in icmp.c. 1285 */ 1286 1287 void ip_rt_send_redirect(struct sk_buff *skb) 1288 { 1289 struct rtable *rt = (struct rtable*)skb->dst; 1290 struct in_device *in_dev = in_dev_get(rt->u.dst.dev); 1291 1292 if (!in_dev) 1293 return; 1294 1295 if (!IN_DEV_TX_REDIRECTS(in_dev)) 1296 goto out; 1297 1298 /* No redirected packets during ip_rt_redirect_silence; 1299 * reset the algorithm. 1300 */ 1301 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) 1302 rt->u.dst.rate_tokens = 0; 1303 1304 /* Too many ignored redirects; do not send anything 1305 * set u.dst.rate_last to the last seen redirected packet. 1306 */ 1307 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1308 rt->u.dst.rate_last = jiffies; 1309 goto out; 1310 } 1311 1312 /* Check for load limit; set rate_last to the latest sent 1313 * redirect. 1314 */ 1315 if (time_after(jiffies, 1316 (rt->u.dst.rate_last + 1317 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { 1318 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1319 rt->u.dst.rate_last = jiffies; 1320 ++rt->u.dst.rate_tokens; 1321 #ifdef CONFIG_IP_ROUTE_VERBOSE 1322 if (IN_DEV_LOG_MARTIANS(in_dev) && 1323 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1324 net_ratelimit()) 1325 printk(KERN_WARNING "host %u.%u.%u.%u/if%d ignores " 1326 "redirects for %u.%u.%u.%u to %u.%u.%u.%u.\n", 1327 NIPQUAD(rt->rt_src), rt->rt_iif, 1328 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); 1329 #endif 1330 } 1331 out: 1332 in_dev_put(in_dev); 1333 } 1334 1335 static int ip_error(struct sk_buff *skb) 1336 { 1337 struct rtable *rt = (struct rtable*)skb->dst; 1338 unsigned long now; 1339 int code; 1340 1341 switch (rt->u.dst.error) { 1342 case EINVAL: 1343 default: 1344 goto out; 1345 case EHOSTUNREACH: 1346 code = ICMP_HOST_UNREACH; 1347 break; 1348 case ENETUNREACH: 1349 code = ICMP_NET_UNREACH; 1350 break; 1351 case EACCES: 1352 code = ICMP_PKT_FILTERED; 1353 break; 1354 } 1355 1356 now = jiffies; 1357 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; 1358 if (rt->u.dst.rate_tokens > ip_rt_error_burst) 1359 rt->u.dst.rate_tokens = ip_rt_error_burst; 1360 rt->u.dst.rate_last = now; 1361 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { 1362 rt->u.dst.rate_tokens -= ip_rt_error_cost; 1363 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1364 } 1365 1366 out: kfree_skb(skb); 1367 return 0; 1368 } 1369 1370 /* 1371 * The last two values are not from the RFC but 1372 * are needed for AMPRnet AX.25 paths. 1373 */ 1374 1375 static const unsigned short mtu_plateau[] = 1376 {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; 1377 1378 static __inline__ unsigned short guess_mtu(unsigned short old_mtu) 1379 { 1380 int i; 1381 1382 for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) 1383 if (old_mtu > mtu_plateau[i]) 1384 return mtu_plateau[i]; 1385 return 68; 1386 } 1387 1388 unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) 1389 { 1390 int i; 1391 unsigned short old_mtu = ntohs(iph->tot_len); 1392 struct rtable *rth; 1393 u32 skeys[2] = { iph->saddr, 0, }; 1394 u32 daddr = iph->daddr; 1395 unsigned short est_mtu = 0; 1396 1397 if (ipv4_config.no_pmtu_disc) 1398 return 0; 1399 1400 for (i = 0; i < 2; i++) { 1401 unsigned hash = rt_hash_code(daddr, skeys[i]); 1402 1403 rcu_read_lock(); 1404 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1405 rth = rcu_dereference(rth->u.rt_next)) { 1406 if (rth->fl.fl4_dst == daddr && 1407 rth->fl.fl4_src == skeys[i] && 1408 rth->rt_dst == daddr && 1409 rth->rt_src == iph->saddr && 1410 rth->fl.iif == 0 && 1411 !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { 1412 unsigned short mtu = new_mtu; 1413 1414 if (new_mtu < 68 || new_mtu >= old_mtu) { 1415 1416 /* BSD 4.2 compatibility hack :-( */ 1417 if (mtu == 0 && 1418 old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] && 1419 old_mtu >= 68 + (iph->ihl << 2)) 1420 old_mtu -= iph->ihl << 2; 1421 1422 mtu = guess_mtu(old_mtu); 1423 } 1424 if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) { 1425 if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { 1426 dst_confirm(&rth->u.dst); 1427 if (mtu < ip_rt_min_pmtu) { 1428 mtu = ip_rt_min_pmtu; 1429 rth->u.dst.metrics[RTAX_LOCK-1] |= 1430 (1 << RTAX_MTU); 1431 } 1432 rth->u.dst.metrics[RTAX_MTU-1] = mtu; 1433 dst_set_expires(&rth->u.dst, 1434 ip_rt_mtu_expires); 1435 } 1436 est_mtu = mtu; 1437 } 1438 } 1439 } 1440 rcu_read_unlock(); 1441 } 1442 return est_mtu ? : new_mtu; 1443 } 1444 1445 static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1446 { 1447 if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 && 1448 !(dst_metric_locked(dst, RTAX_MTU))) { 1449 if (mtu < ip_rt_min_pmtu) { 1450 mtu = ip_rt_min_pmtu; 1451 dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); 1452 } 1453 dst->metrics[RTAX_MTU-1] = mtu; 1454 dst_set_expires(dst, ip_rt_mtu_expires); 1455 } 1456 } 1457 1458 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1459 { 1460 return NULL; 1461 } 1462 1463 static void ipv4_dst_destroy(struct dst_entry *dst) 1464 { 1465 struct rtable *rt = (struct rtable *) dst; 1466 struct inet_peer *peer = rt->peer; 1467 struct in_device *idev = rt->idev; 1468 1469 if (peer) { 1470 rt->peer = NULL; 1471 inet_putpeer(peer); 1472 } 1473 1474 if (idev) { 1475 rt->idev = NULL; 1476 in_dev_put(idev); 1477 } 1478 } 1479 1480 static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 1481 int how) 1482 { 1483 struct rtable *rt = (struct rtable *) dst; 1484 struct in_device *idev = rt->idev; 1485 if (dev != &loopback_dev && idev && idev->dev == dev) { 1486 struct in_device *loopback_idev = in_dev_get(&loopback_dev); 1487 if (loopback_idev) { 1488 rt->idev = loopback_idev; 1489 in_dev_put(idev); 1490 } 1491 } 1492 } 1493 1494 static void ipv4_link_failure(struct sk_buff *skb) 1495 { 1496 struct rtable *rt; 1497 1498 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1499 1500 rt = (struct rtable *) skb->dst; 1501 if (rt) 1502 dst_set_expires(&rt->u.dst, 0); 1503 } 1504 1505 static int ip_rt_bug(struct sk_buff *skb) 1506 { 1507 printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n", 1508 NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), 1509 skb->dev ? skb->dev->name : "?"); 1510 kfree_skb(skb); 1511 return 0; 1512 } 1513 1514 /* 1515 We do not cache source address of outgoing interface, 1516 because it is used only by IP RR, TS and SRR options, 1517 so that it out of fast path. 1518 1519 BTW remember: "addr" is allowed to be not aligned 1520 in IP options! 1521 */ 1522 1523 void ip_rt_get_source(u8 *addr, struct rtable *rt) 1524 { 1525 u32 src; 1526 struct fib_result res; 1527 1528 if (rt->fl.iif == 0) 1529 src = rt->rt_src; 1530 else if (fib_lookup(&rt->fl, &res) == 0) { 1531 src = FIB_RES_PREFSRC(res); 1532 fib_res_put(&res); 1533 } else 1534 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, 1535 RT_SCOPE_UNIVERSE); 1536 memcpy(addr, &src, 4); 1537 } 1538 1539 #ifdef CONFIG_NET_CLS_ROUTE 1540 static void set_class_tag(struct rtable *rt, u32 tag) 1541 { 1542 if (!(rt->u.dst.tclassid & 0xFFFF)) 1543 rt->u.dst.tclassid |= tag & 0xFFFF; 1544 if (!(rt->u.dst.tclassid & 0xFFFF0000)) 1545 rt->u.dst.tclassid |= tag & 0xFFFF0000; 1546 } 1547 #endif 1548 1549 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) 1550 { 1551 struct fib_info *fi = res->fi; 1552 1553 if (fi) { 1554 if (FIB_RES_GW(*res) && 1555 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1556 rt->rt_gateway = FIB_RES_GW(*res); 1557 memcpy(rt->u.dst.metrics, fi->fib_metrics, 1558 sizeof(rt->u.dst.metrics)); 1559 if (fi->fib_mtu == 0) { 1560 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 1561 if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) && 1562 rt->rt_gateway != rt->rt_dst && 1563 rt->u.dst.dev->mtu > 576) 1564 rt->u.dst.metrics[RTAX_MTU-1] = 576; 1565 } 1566 #ifdef CONFIG_NET_CLS_ROUTE 1567 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1568 #endif 1569 } else 1570 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; 1571 1572 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1573 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; 1574 if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) 1575 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; 1576 if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) 1577 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, 1578 ip_rt_min_advmss); 1579 if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40) 1580 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; 1581 1582 #ifdef CONFIG_NET_CLS_ROUTE 1583 #ifdef CONFIG_IP_MULTIPLE_TABLES 1584 set_class_tag(rt, fib_rules_tclass(res)); 1585 #endif 1586 set_class_tag(rt, itag); 1587 #endif 1588 rt->rt_type = res->type; 1589 } 1590 1591 static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, 1592 u8 tos, struct net_device *dev, int our) 1593 { 1594 unsigned hash; 1595 struct rtable *rth; 1596 u32 spec_dst; 1597 struct in_device *in_dev = in_dev_get(dev); 1598 u32 itag = 0; 1599 1600 /* Primary sanity checks. */ 1601 1602 if (in_dev == NULL) 1603 return -EINVAL; 1604 1605 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr) || 1606 skb->protocol != htons(ETH_P_IP)) 1607 goto e_inval; 1608 1609 if (ZERONET(saddr)) { 1610 if (!LOCAL_MCAST(daddr)) 1611 goto e_inval; 1612 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1613 } else if (fib_validate_source(saddr, 0, tos, 0, 1614 dev, &spec_dst, &itag) < 0) 1615 goto e_inval; 1616 1617 rth = dst_alloc(&ipv4_dst_ops); 1618 if (!rth) 1619 goto e_nobufs; 1620 1621 rth->u.dst.output= ip_rt_bug; 1622 1623 atomic_set(&rth->u.dst.__refcnt, 1); 1624 rth->u.dst.flags= DST_HOST; 1625 if (in_dev->cnf.no_policy) 1626 rth->u.dst.flags |= DST_NOPOLICY; 1627 rth->fl.fl4_dst = daddr; 1628 rth->rt_dst = daddr; 1629 rth->fl.fl4_tos = tos; 1630 #ifdef CONFIG_IP_ROUTE_FWMARK 1631 rth->fl.fl4_fwmark= skb->nfmark; 1632 #endif 1633 rth->fl.fl4_src = saddr; 1634 rth->rt_src = saddr; 1635 #ifdef CONFIG_NET_CLS_ROUTE 1636 rth->u.dst.tclassid = itag; 1637 #endif 1638 rth->rt_iif = 1639 rth->fl.iif = dev->ifindex; 1640 rth->u.dst.dev = &loopback_dev; 1641 dev_hold(rth->u.dst.dev); 1642 rth->idev = in_dev_get(rth->u.dst.dev); 1643 rth->fl.oif = 0; 1644 rth->rt_gateway = daddr; 1645 rth->rt_spec_dst= spec_dst; 1646 rth->rt_type = RTN_MULTICAST; 1647 rth->rt_flags = RTCF_MULTICAST; 1648 if (our) { 1649 rth->u.dst.input= ip_local_deliver; 1650 rth->rt_flags |= RTCF_LOCAL; 1651 } 1652 1653 #ifdef CONFIG_IP_MROUTE 1654 if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 1655 rth->u.dst.input = ip_mr_input; 1656 #endif 1657 RT_CACHE_STAT_INC(in_slow_mc); 1658 1659 in_dev_put(in_dev); 1660 hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5)); 1661 return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); 1662 1663 e_nobufs: 1664 in_dev_put(in_dev); 1665 return -ENOBUFS; 1666 1667 e_inval: 1668 in_dev_put(in_dev); 1669 return -EINVAL; 1670 } 1671 1672 1673 static void ip_handle_martian_source(struct net_device *dev, 1674 struct in_device *in_dev, 1675 struct sk_buff *skb, 1676 u32 daddr, 1677 u32 saddr) 1678 { 1679 RT_CACHE_STAT_INC(in_martian_src); 1680 #ifdef CONFIG_IP_ROUTE_VERBOSE 1681 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { 1682 /* 1683 * RFC1812 recommendation, if source is martian, 1684 * the only hint is MAC header. 1685 */ 1686 printk(KERN_WARNING "martian source %u.%u.%u.%u from " 1687 "%u.%u.%u.%u, on dev %s\n", 1688 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 1689 if (dev->hard_header_len && skb->mac.raw) { 1690 int i; 1691 unsigned char *p = skb->mac.raw; 1692 printk(KERN_WARNING "ll header: "); 1693 for (i = 0; i < dev->hard_header_len; i++, p++) { 1694 printk("%02x", *p); 1695 if (i < (dev->hard_header_len - 1)) 1696 printk(":"); 1697 } 1698 printk("\n"); 1699 } 1700 } 1701 #endif 1702 } 1703 1704 static inline int __mkroute_input(struct sk_buff *skb, 1705 struct fib_result* res, 1706 struct in_device *in_dev, 1707 u32 daddr, u32 saddr, u32 tos, 1708 struct rtable **result) 1709 { 1710 1711 struct rtable *rth; 1712 int err; 1713 struct in_device *out_dev; 1714 unsigned flags = 0; 1715 u32 spec_dst, itag; 1716 1717 /* get a working reference to the output device */ 1718 out_dev = in_dev_get(FIB_RES_DEV(*res)); 1719 if (out_dev == NULL) { 1720 if (net_ratelimit()) 1721 printk(KERN_CRIT "Bug in ip_route_input" \ 1722 "_slow(). Please, report\n"); 1723 return -EINVAL; 1724 } 1725 1726 1727 err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), 1728 in_dev->dev, &spec_dst, &itag); 1729 if (err < 0) { 1730 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1731 saddr); 1732 1733 err = -EINVAL; 1734 goto cleanup; 1735 } 1736 1737 if (err) 1738 flags |= RTCF_DIRECTSRC; 1739 1740 if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) && 1741 (IN_DEV_SHARED_MEDIA(out_dev) || 1742 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1743 flags |= RTCF_DOREDIRECT; 1744 1745 if (skb->protocol != htons(ETH_P_IP)) { 1746 /* Not IP (i.e. ARP). Do not create route, if it is 1747 * invalid for proxy arp. DNAT routes are always valid. 1748 */ 1749 if (out_dev == in_dev && !(flags & RTCF_DNAT)) { 1750 err = -EINVAL; 1751 goto cleanup; 1752 } 1753 } 1754 1755 1756 rth = dst_alloc(&ipv4_dst_ops); 1757 if (!rth) { 1758 err = -ENOBUFS; 1759 goto cleanup; 1760 } 1761 1762 atomic_set(&rth->u.dst.__refcnt, 1); 1763 rth->u.dst.flags= DST_HOST; 1764 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 1765 if (res->fi->fib_nhs > 1) 1766 rth->u.dst.flags |= DST_BALANCED; 1767 #endif 1768 if (in_dev->cnf.no_policy) 1769 rth->u.dst.flags |= DST_NOPOLICY; 1770 if (in_dev->cnf.no_xfrm) 1771 rth->u.dst.flags |= DST_NOXFRM; 1772 rth->fl.fl4_dst = daddr; 1773 rth->rt_dst = daddr; 1774 rth->fl.fl4_tos = tos; 1775 #ifdef CONFIG_IP_ROUTE_FWMARK 1776 rth->fl.fl4_fwmark= skb->nfmark; 1777 #endif 1778 rth->fl.fl4_src = saddr; 1779 rth->rt_src = saddr; 1780 rth->rt_gateway = daddr; 1781 rth->rt_iif = 1782 rth->fl.iif = in_dev->dev->ifindex; 1783 rth->u.dst.dev = (out_dev)->dev; 1784 dev_hold(rth->u.dst.dev); 1785 rth->idev = in_dev_get(rth->u.dst.dev); 1786 rth->fl.oif = 0; 1787 rth->rt_spec_dst= spec_dst; 1788 1789 rth->u.dst.input = ip_forward; 1790 rth->u.dst.output = ip_output; 1791 1792 rt_set_nexthop(rth, res, itag); 1793 1794 rth->rt_flags = flags; 1795 1796 *result = rth; 1797 err = 0; 1798 cleanup: 1799 /* release the working reference to the output device */ 1800 in_dev_put(out_dev); 1801 return err; 1802 } 1803 1804 static inline int ip_mkroute_input_def(struct sk_buff *skb, 1805 struct fib_result* res, 1806 const struct flowi *fl, 1807 struct in_device *in_dev, 1808 u32 daddr, u32 saddr, u32 tos) 1809 { 1810 struct rtable* rth = NULL; 1811 int err; 1812 unsigned hash; 1813 1814 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1815 if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) 1816 fib_select_multipath(fl, res); 1817 #endif 1818 1819 /* create a routing cache entry */ 1820 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); 1821 if (err) 1822 return err; 1823 1824 /* put it into the cache */ 1825 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); 1826 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1827 } 1828 1829 static inline int ip_mkroute_input(struct sk_buff *skb, 1830 struct fib_result* res, 1831 const struct flowi *fl, 1832 struct in_device *in_dev, 1833 u32 daddr, u32 saddr, u32 tos) 1834 { 1835 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 1836 struct rtable* rth = NULL, *rtres; 1837 unsigned char hop, hopcount; 1838 int err = -EINVAL; 1839 unsigned int hash; 1840 1841 if (res->fi) 1842 hopcount = res->fi->fib_nhs; 1843 else 1844 hopcount = 1; 1845 1846 /* distinguish between multipath and singlepath */ 1847 if (hopcount < 2) 1848 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, 1849 saddr, tos); 1850 1851 /* add all alternatives to the routing cache */ 1852 for (hop = 0; hop < hopcount; hop++) { 1853 res->nh_sel = hop; 1854 1855 /* put reference to previous result */ 1856 if (hop) 1857 ip_rt_put(rtres); 1858 1859 /* create a routing cache entry */ 1860 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, 1861 &rth); 1862 if (err) 1863 return err; 1864 1865 /* put it into the cache */ 1866 hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); 1867 err = rt_intern_hash(hash, rth, &rtres); 1868 if (err) 1869 return err; 1870 1871 /* forward hop information to multipath impl. */ 1872 multipath_set_nhinfo(rth, 1873 FIB_RES_NETWORK(*res), 1874 FIB_RES_NETMASK(*res), 1875 res->prefixlen, 1876 &FIB_RES_NH(*res)); 1877 } 1878 skb->dst = &rtres->u.dst; 1879 return err; 1880 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 1881 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); 1882 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 1883 } 1884 1885 1886 /* 1887 * NOTE. We drop all the packets that has local source 1888 * addresses, because every properly looped back packet 1889 * must have correct destination already attached by output routine. 1890 * 1891 * Such approach solves two big problems: 1892 * 1. Not simplex devices are handled properly. 1893 * 2. IP spoofing attempts are filtered with 100% of guarantee. 1894 */ 1895 1896 static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, 1897 u8 tos, struct net_device *dev) 1898 { 1899 struct fib_result res; 1900 struct in_device *in_dev = in_dev_get(dev); 1901 struct flowi fl = { .nl_u = { .ip4_u = 1902 { .daddr = daddr, 1903 .saddr = saddr, 1904 .tos = tos, 1905 .scope = RT_SCOPE_UNIVERSE, 1906 #ifdef CONFIG_IP_ROUTE_FWMARK 1907 .fwmark = skb->nfmark 1908 #endif 1909 } }, 1910 .iif = dev->ifindex }; 1911 unsigned flags = 0; 1912 u32 itag = 0; 1913 struct rtable * rth; 1914 unsigned hash; 1915 u32 spec_dst; 1916 int err = -EINVAL; 1917 int free_res = 0; 1918 1919 /* IP on this device is disabled. */ 1920 1921 if (!in_dev) 1922 goto out; 1923 1924 /* Check for the most weird martians, which can be not detected 1925 by fib_lookup. 1926 */ 1927 1928 if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) 1929 goto martian_source; 1930 1931 if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0)) 1932 goto brd_input; 1933 1934 /* Accept zero addresses only to limited broadcast; 1935 * I even do not know to fix it or not. Waiting for complains :-) 1936 */ 1937 if (ZERONET(saddr)) 1938 goto martian_source; 1939 1940 if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr)) 1941 goto martian_destination; 1942 1943 /* 1944 * Now we are ready to route packet. 1945 */ 1946 if ((err = fib_lookup(&fl, &res)) != 0) { 1947 if (!IN_DEV_FORWARD(in_dev)) 1948 goto e_hostunreach; 1949 goto no_route; 1950 } 1951 free_res = 1; 1952 1953 RT_CACHE_STAT_INC(in_slow_tot); 1954 1955 if (res.type == RTN_BROADCAST) 1956 goto brd_input; 1957 1958 if (res.type == RTN_LOCAL) { 1959 int result; 1960 result = fib_validate_source(saddr, daddr, tos, 1961 loopback_dev.ifindex, 1962 dev, &spec_dst, &itag); 1963 if (result < 0) 1964 goto martian_source; 1965 if (result) 1966 flags |= RTCF_DIRECTSRC; 1967 spec_dst = daddr; 1968 goto local_input; 1969 } 1970 1971 if (!IN_DEV_FORWARD(in_dev)) 1972 goto e_hostunreach; 1973 if (res.type != RTN_UNICAST) 1974 goto martian_destination; 1975 1976 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 1977 if (err == -ENOBUFS) 1978 goto e_nobufs; 1979 if (err == -EINVAL) 1980 goto e_inval; 1981 1982 done: 1983 in_dev_put(in_dev); 1984 if (free_res) 1985 fib_res_put(&res); 1986 out: return err; 1987 1988 brd_input: 1989 if (skb->protocol != htons(ETH_P_IP)) 1990 goto e_inval; 1991 1992 if (ZERONET(saddr)) 1993 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1994 else { 1995 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 1996 &itag); 1997 if (err < 0) 1998 goto martian_source; 1999 if (err) 2000 flags |= RTCF_DIRECTSRC; 2001 } 2002 flags |= RTCF_BROADCAST; 2003 res.type = RTN_BROADCAST; 2004 RT_CACHE_STAT_INC(in_brd); 2005 2006 local_input: 2007 rth = dst_alloc(&ipv4_dst_ops); 2008 if (!rth) 2009 goto e_nobufs; 2010 2011 rth->u.dst.output= ip_rt_bug; 2012 2013 atomic_set(&rth->u.dst.__refcnt, 1); 2014 rth->u.dst.flags= DST_HOST; 2015 if (in_dev->cnf.no_policy) 2016 rth->u.dst.flags |= DST_NOPOLICY; 2017 rth->fl.fl4_dst = daddr; 2018 rth->rt_dst = daddr; 2019 rth->fl.fl4_tos = tos; 2020 #ifdef CONFIG_IP_ROUTE_FWMARK 2021 rth->fl.fl4_fwmark= skb->nfmark; 2022 #endif 2023 rth->fl.fl4_src = saddr; 2024 rth->rt_src = saddr; 2025 #ifdef CONFIG_NET_CLS_ROUTE 2026 rth->u.dst.tclassid = itag; 2027 #endif 2028 rth->rt_iif = 2029 rth->fl.iif = dev->ifindex; 2030 rth->u.dst.dev = &loopback_dev; 2031 dev_hold(rth->u.dst.dev); 2032 rth->idev = in_dev_get(rth->u.dst.dev); 2033 rth->rt_gateway = daddr; 2034 rth->rt_spec_dst= spec_dst; 2035 rth->u.dst.input= ip_local_deliver; 2036 rth->rt_flags = flags|RTCF_LOCAL; 2037 if (res.type == RTN_UNREACHABLE) { 2038 rth->u.dst.input= ip_error; 2039 rth->u.dst.error= -err; 2040 rth->rt_flags &= ~RTCF_LOCAL; 2041 } 2042 rth->rt_type = res.type; 2043 hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5)); 2044 err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 2045 goto done; 2046 2047 no_route: 2048 RT_CACHE_STAT_INC(in_no_route); 2049 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 2050 res.type = RTN_UNREACHABLE; 2051 goto local_input; 2052 2053 /* 2054 * Do not cache martian addresses: they should be logged (RFC1812) 2055 */ 2056 martian_destination: 2057 RT_CACHE_STAT_INC(in_martian_dst); 2058 #ifdef CONFIG_IP_ROUTE_VERBOSE 2059 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 2060 printk(KERN_WARNING "martian destination %u.%u.%u.%u from " 2061 "%u.%u.%u.%u, dev %s\n", 2062 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 2063 #endif 2064 2065 e_hostunreach: 2066 err = -EHOSTUNREACH; 2067 goto done; 2068 2069 e_inval: 2070 err = -EINVAL; 2071 goto done; 2072 2073 e_nobufs: 2074 err = -ENOBUFS; 2075 goto done; 2076 2077 martian_source: 2078 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2079 goto e_inval; 2080 } 2081 2082 int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, 2083 u8 tos, struct net_device *dev) 2084 { 2085 struct rtable * rth; 2086 unsigned hash; 2087 int iif = dev->ifindex; 2088 2089 tos &= IPTOS_RT_MASK; 2090 hash = rt_hash_code(daddr, saddr ^ (iif << 5)); 2091 2092 rcu_read_lock(); 2093 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2094 rth = rcu_dereference(rth->u.rt_next)) { 2095 if (rth->fl.fl4_dst == daddr && 2096 rth->fl.fl4_src == saddr && 2097 rth->fl.iif == iif && 2098 rth->fl.oif == 0 && 2099 #ifdef CONFIG_IP_ROUTE_FWMARK 2100 rth->fl.fl4_fwmark == skb->nfmark && 2101 #endif 2102 rth->fl.fl4_tos == tos) { 2103 rth->u.dst.lastuse = jiffies; 2104 dst_hold(&rth->u.dst); 2105 rth->u.dst.__use++; 2106 RT_CACHE_STAT_INC(in_hit); 2107 rcu_read_unlock(); 2108 skb->dst = (struct dst_entry*)rth; 2109 return 0; 2110 } 2111 RT_CACHE_STAT_INC(in_hlist_search); 2112 } 2113 rcu_read_unlock(); 2114 2115 /* Multicast recognition logic is moved from route cache to here. 2116 The problem was that too many Ethernet cards have broken/missing 2117 hardware multicast filters :-( As result the host on multicasting 2118 network acquires a lot of useless route cache entries, sort of 2119 SDR messages from all the world. Now we try to get rid of them. 2120 Really, provided software IP multicast filter is organized 2121 reasonably (at least, hashed), it does not result in a slowdown 2122 comparing with route cache reject entries. 2123 Note, that multicast routers are not affected, because 2124 route cache entry is created eventually. 2125 */ 2126 if (MULTICAST(daddr)) { 2127 struct in_device *in_dev; 2128 2129 rcu_read_lock(); 2130 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { 2131 int our = ip_check_mc(in_dev, daddr, saddr, 2132 skb->nh.iph->protocol); 2133 if (our 2134 #ifdef CONFIG_IP_MROUTE 2135 || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev)) 2136 #endif 2137 ) { 2138 rcu_read_unlock(); 2139 return ip_route_input_mc(skb, daddr, saddr, 2140 tos, dev, our); 2141 } 2142 } 2143 rcu_read_unlock(); 2144 return -EINVAL; 2145 } 2146 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2147 } 2148 2149 static inline int __mkroute_output(struct rtable **result, 2150 struct fib_result* res, 2151 const struct flowi *fl, 2152 const struct flowi *oldflp, 2153 struct net_device *dev_out, 2154 unsigned flags) 2155 { 2156 struct rtable *rth; 2157 struct in_device *in_dev; 2158 u32 tos = RT_FL_TOS(oldflp); 2159 int err = 0; 2160 2161 if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2162 return -EINVAL; 2163 2164 if (fl->fl4_dst == 0xFFFFFFFF) 2165 res->type = RTN_BROADCAST; 2166 else if (MULTICAST(fl->fl4_dst)) 2167 res->type = RTN_MULTICAST; 2168 else if (BADCLASS(fl->fl4_dst) || ZERONET(fl->fl4_dst)) 2169 return -EINVAL; 2170 2171 if (dev_out->flags & IFF_LOOPBACK) 2172 flags |= RTCF_LOCAL; 2173 2174 /* get work reference to inet device */ 2175 in_dev = in_dev_get(dev_out); 2176 if (!in_dev) 2177 return -EINVAL; 2178 2179 if (res->type == RTN_BROADCAST) { 2180 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2181 if (res->fi) { 2182 fib_info_put(res->fi); 2183 res->fi = NULL; 2184 } 2185 } else if (res->type == RTN_MULTICAST) { 2186 flags |= RTCF_MULTICAST|RTCF_LOCAL; 2187 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2188 oldflp->proto)) 2189 flags &= ~RTCF_LOCAL; 2190 /* If multicast route do not exist use 2191 default one, but do not gateway in this case. 2192 Yes, it is hack. 2193 */ 2194 if (res->fi && res->prefixlen < 4) { 2195 fib_info_put(res->fi); 2196 res->fi = NULL; 2197 } 2198 } 2199 2200 2201 rth = dst_alloc(&ipv4_dst_ops); 2202 if (!rth) { 2203 err = -ENOBUFS; 2204 goto cleanup; 2205 } 2206 2207 atomic_set(&rth->u.dst.__refcnt, 1); 2208 rth->u.dst.flags= DST_HOST; 2209 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2210 if (res->fi) { 2211 rth->rt_multipath_alg = res->fi->fib_mp_alg; 2212 if (res->fi->fib_nhs > 1) 2213 rth->u.dst.flags |= DST_BALANCED; 2214 } 2215 #endif 2216 if (in_dev->cnf.no_xfrm) 2217 rth->u.dst.flags |= DST_NOXFRM; 2218 if (in_dev->cnf.no_policy) 2219 rth->u.dst.flags |= DST_NOPOLICY; 2220 2221 rth->fl.fl4_dst = oldflp->fl4_dst; 2222 rth->fl.fl4_tos = tos; 2223 rth->fl.fl4_src = oldflp->fl4_src; 2224 rth->fl.oif = oldflp->oif; 2225 #ifdef CONFIG_IP_ROUTE_FWMARK 2226 rth->fl.fl4_fwmark= oldflp->fl4_fwmark; 2227 #endif 2228 rth->rt_dst = fl->fl4_dst; 2229 rth->rt_src = fl->fl4_src; 2230 rth->rt_iif = oldflp->oif ? : dev_out->ifindex; 2231 /* get references to the devices that are to be hold by the routing 2232 cache entry */ 2233 rth->u.dst.dev = dev_out; 2234 dev_hold(dev_out); 2235 rth->idev = in_dev_get(dev_out); 2236 rth->rt_gateway = fl->fl4_dst; 2237 rth->rt_spec_dst= fl->fl4_src; 2238 2239 rth->u.dst.output=ip_output; 2240 2241 RT_CACHE_STAT_INC(out_slow_tot); 2242 2243 if (flags & RTCF_LOCAL) { 2244 rth->u.dst.input = ip_local_deliver; 2245 rth->rt_spec_dst = fl->fl4_dst; 2246 } 2247 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2248 rth->rt_spec_dst = fl->fl4_src; 2249 if (flags & RTCF_LOCAL && 2250 !(dev_out->flags & IFF_LOOPBACK)) { 2251 rth->u.dst.output = ip_mc_output; 2252 RT_CACHE_STAT_INC(out_slow_mc); 2253 } 2254 #ifdef CONFIG_IP_MROUTE 2255 if (res->type == RTN_MULTICAST) { 2256 if (IN_DEV_MFORWARD(in_dev) && 2257 !LOCAL_MCAST(oldflp->fl4_dst)) { 2258 rth->u.dst.input = ip_mr_input; 2259 rth->u.dst.output = ip_mc_output; 2260 } 2261 } 2262 #endif 2263 } 2264 2265 rt_set_nexthop(rth, res, 0); 2266 2267 rth->rt_flags = flags; 2268 2269 *result = rth; 2270 cleanup: 2271 /* release work reference to inet device */ 2272 in_dev_put(in_dev); 2273 2274 return err; 2275 } 2276 2277 static inline int ip_mkroute_output_def(struct rtable **rp, 2278 struct fib_result* res, 2279 const struct flowi *fl, 2280 const struct flowi *oldflp, 2281 struct net_device *dev_out, 2282 unsigned flags) 2283 { 2284 struct rtable *rth = NULL; 2285 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2286 unsigned hash; 2287 if (err == 0) { 2288 hash = rt_hash_code(oldflp->fl4_dst, 2289 oldflp->fl4_src ^ (oldflp->oif << 5)); 2290 err = rt_intern_hash(hash, rth, rp); 2291 } 2292 2293 return err; 2294 } 2295 2296 static inline int ip_mkroute_output(struct rtable** rp, 2297 struct fib_result* res, 2298 const struct flowi *fl, 2299 const struct flowi *oldflp, 2300 struct net_device *dev_out, 2301 unsigned flags) 2302 { 2303 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2304 unsigned char hop; 2305 unsigned hash; 2306 int err = -EINVAL; 2307 struct rtable *rth = NULL; 2308 2309 if (res->fi && res->fi->fib_nhs > 1) { 2310 unsigned char hopcount = res->fi->fib_nhs; 2311 2312 for (hop = 0; hop < hopcount; hop++) { 2313 struct net_device *dev2nexthop; 2314 2315 res->nh_sel = hop; 2316 2317 /* hold a work reference to the output device */ 2318 dev2nexthop = FIB_RES_DEV(*res); 2319 dev_hold(dev2nexthop); 2320 2321 /* put reference to previous result */ 2322 if (hop) 2323 ip_rt_put(*rp); 2324 2325 err = __mkroute_output(&rth, res, fl, oldflp, 2326 dev2nexthop, flags); 2327 2328 if (err != 0) 2329 goto cleanup; 2330 2331 hash = rt_hash_code(oldflp->fl4_dst, 2332 oldflp->fl4_src ^ 2333 (oldflp->oif << 5)); 2334 err = rt_intern_hash(hash, rth, rp); 2335 2336 /* forward hop information to multipath impl. */ 2337 multipath_set_nhinfo(rth, 2338 FIB_RES_NETWORK(*res), 2339 FIB_RES_NETMASK(*res), 2340 res->prefixlen, 2341 &FIB_RES_NH(*res)); 2342 cleanup: 2343 /* release work reference to output device */ 2344 dev_put(dev2nexthop); 2345 2346 if (err != 0) 2347 return err; 2348 } 2349 return err; 2350 } else { 2351 return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, 2352 flags); 2353 } 2354 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 2355 return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags); 2356 #endif 2357 } 2358 2359 /* 2360 * Major route resolver routine. 2361 */ 2362 2363 static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) 2364 { 2365 u32 tos = RT_FL_TOS(oldflp); 2366 struct flowi fl = { .nl_u = { .ip4_u = 2367 { .daddr = oldflp->fl4_dst, 2368 .saddr = oldflp->fl4_src, 2369 .tos = tos & IPTOS_RT_MASK, 2370 .scope = ((tos & RTO_ONLINK) ? 2371 RT_SCOPE_LINK : 2372 RT_SCOPE_UNIVERSE), 2373 #ifdef CONFIG_IP_ROUTE_FWMARK 2374 .fwmark = oldflp->fl4_fwmark 2375 #endif 2376 } }, 2377 .iif = loopback_dev.ifindex, 2378 .oif = oldflp->oif }; 2379 struct fib_result res; 2380 unsigned flags = 0; 2381 struct net_device *dev_out = NULL; 2382 int free_res = 0; 2383 int err; 2384 2385 2386 res.fi = NULL; 2387 #ifdef CONFIG_IP_MULTIPLE_TABLES 2388 res.r = NULL; 2389 #endif 2390 2391 if (oldflp->fl4_src) { 2392 err = -EINVAL; 2393 if (MULTICAST(oldflp->fl4_src) || 2394 BADCLASS(oldflp->fl4_src) || 2395 ZERONET(oldflp->fl4_src)) 2396 goto out; 2397 2398 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2399 dev_out = ip_dev_find(oldflp->fl4_src); 2400 if (dev_out == NULL) 2401 goto out; 2402 2403 /* I removed check for oif == dev_out->oif here. 2404 It was wrong for two reasons: 2405 1. ip_dev_find(saddr) can return wrong iface, if saddr is 2406 assigned to multiple interfaces. 2407 2. Moreover, we are allowed to send packets with saddr 2408 of another iface. --ANK 2409 */ 2410 2411 if (oldflp->oif == 0 2412 && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) { 2413 /* Special hack: user can direct multicasts 2414 and limited broadcast via necessary interface 2415 without fiddling with IP_MULTICAST_IF or IP_PKTINFO. 2416 This hack is not just for fun, it allows 2417 vic,vat and friends to work. 2418 They bind socket to loopback, set ttl to zero 2419 and expect that it will work. 2420 From the viewpoint of routing cache they are broken, 2421 because we are not allowed to build multicast path 2422 with loopback source addr (look, routing cache 2423 cannot know, that ttl is zero, so that packet 2424 will not leave this host and route is valid). 2425 Luckily, this hack is good workaround. 2426 */ 2427 2428 fl.oif = dev_out->ifindex; 2429 goto make_route; 2430 } 2431 if (dev_out) 2432 dev_put(dev_out); 2433 dev_out = NULL; 2434 } 2435 2436 2437 if (oldflp->oif) { 2438 dev_out = dev_get_by_index(oldflp->oif); 2439 err = -ENODEV; 2440 if (dev_out == NULL) 2441 goto out; 2442 2443 /* RACE: Check return value of inet_select_addr instead. */ 2444 if (__in_dev_get_rtnl(dev_out) == NULL) { 2445 dev_put(dev_out); 2446 goto out; /* Wrong error code */ 2447 } 2448 2449 if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) { 2450 if (!fl.fl4_src) 2451 fl.fl4_src = inet_select_addr(dev_out, 0, 2452 RT_SCOPE_LINK); 2453 goto make_route; 2454 } 2455 if (!fl.fl4_src) { 2456 if (MULTICAST(oldflp->fl4_dst)) 2457 fl.fl4_src = inet_select_addr(dev_out, 0, 2458 fl.fl4_scope); 2459 else if (!oldflp->fl4_dst) 2460 fl.fl4_src = inet_select_addr(dev_out, 0, 2461 RT_SCOPE_HOST); 2462 } 2463 } 2464 2465 if (!fl.fl4_dst) { 2466 fl.fl4_dst = fl.fl4_src; 2467 if (!fl.fl4_dst) 2468 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2469 if (dev_out) 2470 dev_put(dev_out); 2471 dev_out = &loopback_dev; 2472 dev_hold(dev_out); 2473 fl.oif = loopback_dev.ifindex; 2474 res.type = RTN_LOCAL; 2475 flags |= RTCF_LOCAL; 2476 goto make_route; 2477 } 2478 2479 if (fib_lookup(&fl, &res)) { 2480 res.fi = NULL; 2481 if (oldflp->oif) { 2482 /* Apparently, routing tables are wrong. Assume, 2483 that the destination is on link. 2484 2485 WHY? DW. 2486 Because we are allowed to send to iface 2487 even if it has NO routes and NO assigned 2488 addresses. When oif is specified, routing 2489 tables are looked up with only one purpose: 2490 to catch if destination is gatewayed, rather than 2491 direct. Moreover, if MSG_DONTROUTE is set, 2492 we send packet, ignoring both routing tables 2493 and ifaddr state. --ANK 2494 2495 2496 We could make it even if oif is unknown, 2497 likely IPv6, but we do not. 2498 */ 2499 2500 if (fl.fl4_src == 0) 2501 fl.fl4_src = inet_select_addr(dev_out, 0, 2502 RT_SCOPE_LINK); 2503 res.type = RTN_UNICAST; 2504 goto make_route; 2505 } 2506 if (dev_out) 2507 dev_put(dev_out); 2508 err = -ENETUNREACH; 2509 goto out; 2510 } 2511 free_res = 1; 2512 2513 if (res.type == RTN_LOCAL) { 2514 if (!fl.fl4_src) 2515 fl.fl4_src = fl.fl4_dst; 2516 if (dev_out) 2517 dev_put(dev_out); 2518 dev_out = &loopback_dev; 2519 dev_hold(dev_out); 2520 fl.oif = dev_out->ifindex; 2521 if (res.fi) 2522 fib_info_put(res.fi); 2523 res.fi = NULL; 2524 flags |= RTCF_LOCAL; 2525 goto make_route; 2526 } 2527 2528 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2529 if (res.fi->fib_nhs > 1 && fl.oif == 0) 2530 fib_select_multipath(&fl, &res); 2531 else 2532 #endif 2533 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) 2534 fib_select_default(&fl, &res); 2535 2536 if (!fl.fl4_src) 2537 fl.fl4_src = FIB_RES_PREFSRC(res); 2538 2539 if (dev_out) 2540 dev_put(dev_out); 2541 dev_out = FIB_RES_DEV(res); 2542 dev_hold(dev_out); 2543 fl.oif = dev_out->ifindex; 2544 2545 2546 make_route: 2547 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2548 2549 2550 if (free_res) 2551 fib_res_put(&res); 2552 if (dev_out) 2553 dev_put(dev_out); 2554 out: return err; 2555 } 2556 2557 int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) 2558 { 2559 unsigned hash; 2560 struct rtable *rth; 2561 2562 hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5)); 2563 2564 rcu_read_lock_bh(); 2565 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2566 rth = rcu_dereference(rth->u.rt_next)) { 2567 if (rth->fl.fl4_dst == flp->fl4_dst && 2568 rth->fl.fl4_src == flp->fl4_src && 2569 rth->fl.iif == 0 && 2570 rth->fl.oif == flp->oif && 2571 #ifdef CONFIG_IP_ROUTE_FWMARK 2572 rth->fl.fl4_fwmark == flp->fl4_fwmark && 2573 #endif 2574 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2575 (IPTOS_RT_MASK | RTO_ONLINK))) { 2576 2577 /* check for multipath routes and choose one if 2578 * necessary 2579 */ 2580 if (multipath_select_route(flp, rth, rp)) { 2581 dst_hold(&(*rp)->u.dst); 2582 RT_CACHE_STAT_INC(out_hit); 2583 rcu_read_unlock_bh(); 2584 return 0; 2585 } 2586 2587 rth->u.dst.lastuse = jiffies; 2588 dst_hold(&rth->u.dst); 2589 rth->u.dst.__use++; 2590 RT_CACHE_STAT_INC(out_hit); 2591 rcu_read_unlock_bh(); 2592 *rp = rth; 2593 return 0; 2594 } 2595 RT_CACHE_STAT_INC(out_hlist_search); 2596 } 2597 rcu_read_unlock_bh(); 2598 2599 return ip_route_output_slow(rp, flp); 2600 } 2601 2602 EXPORT_SYMBOL_GPL(__ip_route_output_key); 2603 2604 int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags) 2605 { 2606 int err; 2607 2608 if ((err = __ip_route_output_key(rp, flp)) != 0) 2609 return err; 2610 2611 if (flp->proto) { 2612 if (!flp->fl4_src) 2613 flp->fl4_src = (*rp)->rt_src; 2614 if (!flp->fl4_dst) 2615 flp->fl4_dst = (*rp)->rt_dst; 2616 return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); 2617 } 2618 2619 return 0; 2620 } 2621 2622 EXPORT_SYMBOL_GPL(ip_route_output_flow); 2623 2624 int ip_route_output_key(struct rtable **rp, struct flowi *flp) 2625 { 2626 return ip_route_output_flow(rp, flp, NULL, 0); 2627 } 2628 2629 static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2630 int nowait, unsigned int flags) 2631 { 2632 struct rtable *rt = (struct rtable*)skb->dst; 2633 struct rtmsg *r; 2634 struct nlmsghdr *nlh; 2635 unsigned char *b = skb->tail; 2636 struct rta_cacheinfo ci; 2637 #ifdef CONFIG_IP_MROUTE 2638 struct rtattr *eptr; 2639 #endif 2640 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); 2641 r = NLMSG_DATA(nlh); 2642 r->rtm_family = AF_INET; 2643 r->rtm_dst_len = 32; 2644 r->rtm_src_len = 0; 2645 r->rtm_tos = rt->fl.fl4_tos; 2646 r->rtm_table = RT_TABLE_MAIN; 2647 r->rtm_type = rt->rt_type; 2648 r->rtm_scope = RT_SCOPE_UNIVERSE; 2649 r->rtm_protocol = RTPROT_UNSPEC; 2650 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 2651 if (rt->rt_flags & RTCF_NOTIFY) 2652 r->rtm_flags |= RTM_F_NOTIFY; 2653 RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); 2654 if (rt->fl.fl4_src) { 2655 r->rtm_src_len = 32; 2656 RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); 2657 } 2658 if (rt->u.dst.dev) 2659 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); 2660 #ifdef CONFIG_NET_CLS_ROUTE 2661 if (rt->u.dst.tclassid) 2662 RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); 2663 #endif 2664 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 2665 if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { 2666 __u32 alg = rt->rt_multipath_alg; 2667 2668 RTA_PUT(skb, RTA_MP_ALGO, 4, &alg); 2669 } 2670 #endif 2671 if (rt->fl.iif) 2672 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); 2673 else if (rt->rt_src != rt->fl.fl4_src) 2674 RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); 2675 if (rt->rt_dst != rt->rt_gateway) 2676 RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); 2677 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2678 goto rtattr_failure; 2679 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 2680 ci.rta_used = rt->u.dst.__use; 2681 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 2682 if (rt->u.dst.expires) 2683 ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies); 2684 else 2685 ci.rta_expires = 0; 2686 ci.rta_error = rt->u.dst.error; 2687 ci.rta_id = ci.rta_ts = ci.rta_tsage = 0; 2688 if (rt->peer) { 2689 ci.rta_id = rt->peer->ip_id_count; 2690 if (rt->peer->tcp_ts_stamp) { 2691 ci.rta_ts = rt->peer->tcp_ts; 2692 ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; 2693 } 2694 } 2695 #ifdef CONFIG_IP_MROUTE 2696 eptr = (struct rtattr*)skb->tail; 2697 #endif 2698 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); 2699 if (rt->fl.iif) { 2700 #ifdef CONFIG_IP_MROUTE 2701 u32 dst = rt->rt_dst; 2702 2703 if (MULTICAST(dst) && !LOCAL_MCAST(dst) && 2704 ipv4_devconf.mc_forwarding) { 2705 int err = ipmr_get_route(skb, r, nowait); 2706 if (err <= 0) { 2707 if (!nowait) { 2708 if (err == 0) 2709 return 0; 2710 goto nlmsg_failure; 2711 } else { 2712 if (err == -EMSGSIZE) 2713 goto nlmsg_failure; 2714 ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; 2715 } 2716 } 2717 } else 2718 #endif 2719 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 2720 } 2721 2722 nlh->nlmsg_len = skb->tail - b; 2723 return skb->len; 2724 2725 nlmsg_failure: 2726 rtattr_failure: 2727 skb_trim(skb, b - skb->data); 2728 return -1; 2729 } 2730 2731 int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2732 { 2733 struct rtattr **rta = arg; 2734 struct rtmsg *rtm = NLMSG_DATA(nlh); 2735 struct rtable *rt = NULL; 2736 u32 dst = 0; 2737 u32 src = 0; 2738 int iif = 0; 2739 int err = -ENOBUFS; 2740 struct sk_buff *skb; 2741 2742 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2743 if (!skb) 2744 goto out; 2745 2746 /* Reserve room for dummy headers, this skb can pass 2747 through good chunk of routing engine. 2748 */ 2749 skb->mac.raw = skb->nh.raw = skb->data; 2750 2751 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ 2752 skb->nh.iph->protocol = IPPROTO_ICMP; 2753 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); 2754 2755 if (rta[RTA_SRC - 1]) 2756 memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); 2757 if (rta[RTA_DST - 1]) 2758 memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4); 2759 if (rta[RTA_IIF - 1]) 2760 memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int)); 2761 2762 if (iif) { 2763 struct net_device *dev = __dev_get_by_index(iif); 2764 err = -ENODEV; 2765 if (!dev) 2766 goto out_free; 2767 skb->protocol = htons(ETH_P_IP); 2768 skb->dev = dev; 2769 local_bh_disable(); 2770 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2771 local_bh_enable(); 2772 rt = (struct rtable*)skb->dst; 2773 if (!err && rt->u.dst.error) 2774 err = -rt->u.dst.error; 2775 } else { 2776 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, 2777 .saddr = src, 2778 .tos = rtm->rtm_tos } } }; 2779 int oif = 0; 2780 if (rta[RTA_OIF - 1]) 2781 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 2782 fl.oif = oif; 2783 err = ip_route_output_key(&rt, &fl); 2784 } 2785 if (err) 2786 goto out_free; 2787 2788 skb->dst = &rt->u.dst; 2789 if (rtm->rtm_flags & RTM_F_NOTIFY) 2790 rt->rt_flags |= RTCF_NOTIFY; 2791 2792 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 2793 2794 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2795 RTM_NEWROUTE, 0, 0); 2796 if (!err) 2797 goto out_free; 2798 if (err < 0) { 2799 err = -EMSGSIZE; 2800 goto out_free; 2801 } 2802 2803 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 2804 if (err > 0) 2805 err = 0; 2806 out: return err; 2807 2808 out_free: 2809 kfree_skb(skb); 2810 goto out; 2811 } 2812 2813 int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) 2814 { 2815 struct rtable *rt; 2816 int h, s_h; 2817 int idx, s_idx; 2818 2819 s_h = cb->args[0]; 2820 s_idx = idx = cb->args[1]; 2821 for (h = 0; h <= rt_hash_mask; h++) { 2822 if (h < s_h) continue; 2823 if (h > s_h) 2824 s_idx = 0; 2825 rcu_read_lock_bh(); 2826 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; 2827 rt = rcu_dereference(rt->u.rt_next), idx++) { 2828 if (idx < s_idx) 2829 continue; 2830 skb->dst = dst_clone(&rt->u.dst); 2831 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2832 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2833 1, NLM_F_MULTI) <= 0) { 2834 dst_release(xchg(&skb->dst, NULL)); 2835 rcu_read_unlock_bh(); 2836 goto done; 2837 } 2838 dst_release(xchg(&skb->dst, NULL)); 2839 } 2840 rcu_read_unlock_bh(); 2841 } 2842 2843 done: 2844 cb->args[0] = h; 2845 cb->args[1] = idx; 2846 return skb->len; 2847 } 2848 2849 void ip_rt_multicast_event(struct in_device *in_dev) 2850 { 2851 rt_cache_flush(0); 2852 } 2853 2854 #ifdef CONFIG_SYSCTL 2855 static int flush_delay; 2856 2857 static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, 2858 struct file *filp, void __user *buffer, 2859 size_t *lenp, loff_t *ppos) 2860 { 2861 if (write) { 2862 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2863 rt_cache_flush(flush_delay); 2864 return 0; 2865 } 2866 2867 return -EINVAL; 2868 } 2869 2870 static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, 2871 int __user *name, 2872 int nlen, 2873 void __user *oldval, 2874 size_t __user *oldlenp, 2875 void __user *newval, 2876 size_t newlen, 2877 void **context) 2878 { 2879 int delay; 2880 if (newlen != sizeof(int)) 2881 return -EINVAL; 2882 if (get_user(delay, (int __user *)newval)) 2883 return -EFAULT; 2884 rt_cache_flush(delay); 2885 return 0; 2886 } 2887 2888 ctl_table ipv4_route_table[] = { 2889 { 2890 .ctl_name = NET_IPV4_ROUTE_FLUSH, 2891 .procname = "flush", 2892 .data = &flush_delay, 2893 .maxlen = sizeof(int), 2894 .mode = 0200, 2895 .proc_handler = &ipv4_sysctl_rtcache_flush, 2896 .strategy = &ipv4_sysctl_rtcache_flush_strategy, 2897 }, 2898 { 2899 .ctl_name = NET_IPV4_ROUTE_MIN_DELAY, 2900 .procname = "min_delay", 2901 .data = &ip_rt_min_delay, 2902 .maxlen = sizeof(int), 2903 .mode = 0644, 2904 .proc_handler = &proc_dointvec_jiffies, 2905 .strategy = &sysctl_jiffies, 2906 }, 2907 { 2908 .ctl_name = NET_IPV4_ROUTE_MAX_DELAY, 2909 .procname = "max_delay", 2910 .data = &ip_rt_max_delay, 2911 .maxlen = sizeof(int), 2912 .mode = 0644, 2913 .proc_handler = &proc_dointvec_jiffies, 2914 .strategy = &sysctl_jiffies, 2915 }, 2916 { 2917 .ctl_name = NET_IPV4_ROUTE_GC_THRESH, 2918 .procname = "gc_thresh", 2919 .data = &ipv4_dst_ops.gc_thresh, 2920 .maxlen = sizeof(int), 2921 .mode = 0644, 2922 .proc_handler = &proc_dointvec, 2923 }, 2924 { 2925 .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, 2926 .procname = "max_size", 2927 .data = &ip_rt_max_size, 2928 .maxlen = sizeof(int), 2929 .mode = 0644, 2930 .proc_handler = &proc_dointvec, 2931 }, 2932 { 2933 /* Deprecated. Use gc_min_interval_ms */ 2934 2935 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL, 2936 .procname = "gc_min_interval", 2937 .data = &ip_rt_gc_min_interval, 2938 .maxlen = sizeof(int), 2939 .mode = 0644, 2940 .proc_handler = &proc_dointvec_jiffies, 2941 .strategy = &sysctl_jiffies, 2942 }, 2943 { 2944 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, 2945 .procname = "gc_min_interval_ms", 2946 .data = &ip_rt_gc_min_interval, 2947 .maxlen = sizeof(int), 2948 .mode = 0644, 2949 .proc_handler = &proc_dointvec_ms_jiffies, 2950 .strategy = &sysctl_ms_jiffies, 2951 }, 2952 { 2953 .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, 2954 .procname = "gc_timeout", 2955 .data = &ip_rt_gc_timeout, 2956 .maxlen = sizeof(int), 2957 .mode = 0644, 2958 .proc_handler = &proc_dointvec_jiffies, 2959 .strategy = &sysctl_jiffies, 2960 }, 2961 { 2962 .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, 2963 .procname = "gc_interval", 2964 .data = &ip_rt_gc_interval, 2965 .maxlen = sizeof(int), 2966 .mode = 0644, 2967 .proc_handler = &proc_dointvec_jiffies, 2968 .strategy = &sysctl_jiffies, 2969 }, 2970 { 2971 .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, 2972 .procname = "redirect_load", 2973 .data = &ip_rt_redirect_load, 2974 .maxlen = sizeof(int), 2975 .mode = 0644, 2976 .proc_handler = &proc_dointvec, 2977 }, 2978 { 2979 .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, 2980 .procname = "redirect_number", 2981 .data = &ip_rt_redirect_number, 2982 .maxlen = sizeof(int), 2983 .mode = 0644, 2984 .proc_handler = &proc_dointvec, 2985 }, 2986 { 2987 .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, 2988 .procname = "redirect_silence", 2989 .data = &ip_rt_redirect_silence, 2990 .maxlen = sizeof(int), 2991 .mode = 0644, 2992 .proc_handler = &proc_dointvec, 2993 }, 2994 { 2995 .ctl_name = NET_IPV4_ROUTE_ERROR_COST, 2996 .procname = "error_cost", 2997 .data = &ip_rt_error_cost, 2998 .maxlen = sizeof(int), 2999 .mode = 0644, 3000 .proc_handler = &proc_dointvec, 3001 }, 3002 { 3003 .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, 3004 .procname = "error_burst", 3005 .data = &ip_rt_error_burst, 3006 .maxlen = sizeof(int), 3007 .mode = 0644, 3008 .proc_handler = &proc_dointvec, 3009 }, 3010 { 3011 .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, 3012 .procname = "gc_elasticity", 3013 .data = &ip_rt_gc_elasticity, 3014 .maxlen = sizeof(int), 3015 .mode = 0644, 3016 .proc_handler = &proc_dointvec, 3017 }, 3018 { 3019 .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, 3020 .procname = "mtu_expires", 3021 .data = &ip_rt_mtu_expires, 3022 .maxlen = sizeof(int), 3023 .mode = 0644, 3024 .proc_handler = &proc_dointvec_jiffies, 3025 .strategy = &sysctl_jiffies, 3026 }, 3027 { 3028 .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, 3029 .procname = "min_pmtu", 3030 .data = &ip_rt_min_pmtu, 3031 .maxlen = sizeof(int), 3032 .mode = 0644, 3033 .proc_handler = &proc_dointvec, 3034 }, 3035 { 3036 .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, 3037 .procname = "min_adv_mss", 3038 .data = &ip_rt_min_advmss, 3039 .maxlen = sizeof(int), 3040 .mode = 0644, 3041 .proc_handler = &proc_dointvec, 3042 }, 3043 { 3044 .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, 3045 .procname = "secret_interval", 3046 .data = &ip_rt_secret_interval, 3047 .maxlen = sizeof(int), 3048 .mode = 0644, 3049 .proc_handler = &proc_dointvec_jiffies, 3050 .strategy = &sysctl_jiffies, 3051 }, 3052 { .ctl_name = 0 } 3053 }; 3054 #endif 3055 3056 #ifdef CONFIG_NET_CLS_ROUTE 3057 struct ip_rt_acct *ip_rt_acct; 3058 3059 /* This code sucks. But you should have seen it before! --RR */ 3060 3061 /* IP route accounting ptr for this logical cpu number. */ 3062 #define IP_RT_ACCT_CPU(i) (ip_rt_acct + i * 256) 3063 3064 #ifdef CONFIG_PROC_FS 3065 static int ip_rt_acct_read(char *buffer, char **start, off_t offset, 3066 int length, int *eof, void *data) 3067 { 3068 unsigned int i; 3069 3070 if ((offset & 3) || (length & 3)) 3071 return -EIO; 3072 3073 if (offset >= sizeof(struct ip_rt_acct) * 256) { 3074 *eof = 1; 3075 return 0; 3076 } 3077 3078 if (offset + length >= sizeof(struct ip_rt_acct) * 256) { 3079 length = sizeof(struct ip_rt_acct) * 256 - offset; 3080 *eof = 1; 3081 } 3082 3083 offset /= sizeof(u32); 3084 3085 if (length > 0) { 3086 u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset; 3087 u32 *dst = (u32 *) buffer; 3088 3089 /* Copy first cpu. */ 3090 *start = buffer; 3091 memcpy(dst, src, length); 3092 3093 /* Add the other cpus in, one int at a time */ 3094 for_each_possible_cpu(i) { 3095 unsigned int j; 3096 3097 src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset; 3098 3099 for (j = 0; j < length/4; j++) 3100 dst[j] += src[j]; 3101 } 3102 } 3103 return length; 3104 } 3105 #endif /* CONFIG_PROC_FS */ 3106 #endif /* CONFIG_NET_CLS_ROUTE */ 3107 3108 static __initdata unsigned long rhash_entries; 3109 static int __init set_rhash_entries(char *str) 3110 { 3111 if (!str) 3112 return 0; 3113 rhash_entries = simple_strtoul(str, &str, 0); 3114 return 1; 3115 } 3116 __setup("rhash_entries=", set_rhash_entries); 3117 3118 int __init ip_rt_init(void) 3119 { 3120 int rc = 0; 3121 3122 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ 3123 (jiffies ^ (jiffies >> 7))); 3124 3125 #ifdef CONFIG_NET_CLS_ROUTE 3126 { 3127 int order; 3128 for (order = 0; 3129 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++) 3130 /* NOTHING */; 3131 ip_rt_acct = (struct ip_rt_acct *)__get_free_pages(GFP_KERNEL, order); 3132 if (!ip_rt_acct) 3133 panic("IP: failed to allocate ip_rt_acct\n"); 3134 memset(ip_rt_acct, 0, PAGE_SIZE << order); 3135 } 3136 #endif 3137 3138 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", 3139 sizeof(struct rtable), 3140 0, SLAB_HWCACHE_ALIGN, 3141 NULL, NULL); 3142 3143 if (!ipv4_dst_ops.kmem_cachep) 3144 panic("IP: failed to allocate ip_dst_cache\n"); 3145 3146 rt_hash_table = (struct rt_hash_bucket *) 3147 alloc_large_system_hash("IP route cache", 3148 sizeof(struct rt_hash_bucket), 3149 rhash_entries, 3150 (num_physpages >= 128 * 1024) ? 3151 15 : 17, 3152 HASH_HIGHMEM, 3153 &rt_hash_log, 3154 &rt_hash_mask, 3155 0); 3156 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); 3157 rt_hash_lock_init(); 3158 3159 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); 3160 ip_rt_max_size = (rt_hash_mask + 1) * 16; 3161 3162 devinet_init(); 3163 ip_fib_init(); 3164 3165 init_timer(&rt_flush_timer); 3166 rt_flush_timer.function = rt_run_flush; 3167 init_timer(&rt_periodic_timer); 3168 rt_periodic_timer.function = rt_check_expire; 3169 init_timer(&rt_secret_timer); 3170 rt_secret_timer.function = rt_secret_rebuild; 3171 3172 /* All the timers, started at system startup tend 3173 to synchronize. Perturb it a bit. 3174 */ 3175 rt_periodic_timer.expires = jiffies + net_random() % ip_rt_gc_interval + 3176 ip_rt_gc_interval; 3177 add_timer(&rt_periodic_timer); 3178 3179 rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + 3180 ip_rt_secret_interval; 3181 add_timer(&rt_secret_timer); 3182 3183 #ifdef CONFIG_PROC_FS 3184 { 3185 struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ 3186 if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || 3187 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 3188 proc_net_stat))) { 3189 return -ENOMEM; 3190 } 3191 rtstat_pde->proc_fops = &rt_cpu_seq_fops; 3192 } 3193 #ifdef CONFIG_NET_CLS_ROUTE 3194 create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL); 3195 #endif 3196 #endif 3197 #ifdef CONFIG_XFRM 3198 xfrm_init(); 3199 xfrm4_init(); 3200 #endif 3201 return rc; 3202 } 3203 3204 EXPORT_SYMBOL(__ip_select_ident); 3205 EXPORT_SYMBOL(ip_route_input); 3206 EXPORT_SYMBOL(ip_route_output_key); 3207