1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 #include <linux/module.h> 17 #include <asm/uaccess.h> 18 #include <asm/system.h> 19 #include <linux/bitops.h> 20 #include <linux/capability.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 #include <linux/mm.h> 24 #include <linux/string.h> 25 #include <linux/socket.h> 26 #include <linux/sockios.h> 27 #include <linux/errno.h> 28 #include <linux/in.h> 29 #include <linux/inet.h> 30 #include <linux/inetdevice.h> 31 #include <linux/netdevice.h> 32 #include <linux/if_addr.h> 33 #include <linux/if_arp.h> 34 #include <linux/skbuff.h> 35 #include <linux/init.h> 36 #include <linux/list.h> 37 #include <linux/slab.h> 38 39 #include <net/ip.h> 40 #include <net/protocol.h> 41 #include <net/route.h> 42 #include <net/tcp.h> 43 #include <net/sock.h> 44 #include <net/arp.h> 45 #include <net/ip_fib.h> 46 #include <net/rtnetlink.h> 47 48 #ifndef CONFIG_IP_MULTIPLE_TABLES 49 50 static int __net_init fib4_rules_init(struct net *net) 51 { 52 struct fib_table *local_table, *main_table; 53 54 local_table = fib_hash_table(RT_TABLE_LOCAL); 55 if (local_table == NULL) 56 return -ENOMEM; 57 58 main_table = fib_hash_table(RT_TABLE_MAIN); 59 if (main_table == NULL) 60 goto fail; 61 62 hlist_add_head_rcu(&local_table->tb_hlist, 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 64 hlist_add_head_rcu(&main_table->tb_hlist, 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 66 return 0; 67 68 fail: 69 kfree(local_table); 70 return -ENOMEM; 71 } 72 #else 73 74 struct fib_table *fib_new_table(struct net *net, u32 id) 75 { 76 struct fib_table *tb; 77 unsigned int h; 78 79 if (id == 0) 80 id = RT_TABLE_MAIN; 81 tb = fib_get_table(net, id); 82 if (tb) 83 return tb; 84 85 tb = fib_hash_table(id); 86 if (!tb) 87 return NULL; 88 h = id & (FIB_TABLE_HASHSZ - 1); 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 90 return tb; 91 } 92 93 struct fib_table *fib_get_table(struct net *net, u32 id) 94 { 95 struct fib_table *tb; 96 struct hlist_node *node; 97 struct hlist_head *head; 98 unsigned int h; 99 100 if (id == 0) 101 id = RT_TABLE_MAIN; 102 h = id & (FIB_TABLE_HASHSZ - 1); 103 104 rcu_read_lock(); 105 head = &net->ipv4.fib_table_hash[h]; 106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 107 if (tb->tb_id == id) { 108 rcu_read_unlock(); 109 return tb; 110 } 111 } 112 rcu_read_unlock(); 113 return NULL; 114 } 115 #endif /* CONFIG_IP_MULTIPLE_TABLES */ 116 117 void fib_select_default(struct net *net, 118 const struct flowi *flp, struct fib_result *res) 119 { 120 struct fib_table *tb; 121 int table = RT_TABLE_MAIN; 122 #ifdef CONFIG_IP_MULTIPLE_TABLES 123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) 124 return; 125 table = res->r->table; 126 #endif 127 tb = fib_get_table(net, table); 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 129 fib_table_select_default(tb, flp, res); 130 } 131 132 static void fib_flush(struct net *net) 133 { 134 int flushed = 0; 135 struct fib_table *tb; 136 struct hlist_node *node; 137 struct hlist_head *head; 138 unsigned int h; 139 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 141 head = &net->ipv4.fib_table_hash[h]; 142 hlist_for_each_entry(tb, node, head, tb_hlist) 143 flushed += fib_table_flush(tb); 144 } 145 146 if (flushed) 147 rt_cache_flush(net, -1); 148 } 149 150 /* 151 * Find the first device with a given source address. 152 */ 153 154 struct net_device * ip_dev_find(struct net *net, __be32 addr) 155 { 156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 157 struct fib_result res; 158 struct net_device *dev = NULL; 159 struct fib_table *local_table; 160 161 #ifdef CONFIG_IP_MULTIPLE_TABLES 162 res.r = NULL; 163 #endif 164 165 local_table = fib_get_table(net, RT_TABLE_LOCAL); 166 if (!local_table || fib_table_lookup(local_table, &fl, &res)) 167 return NULL; 168 if (res.type != RTN_LOCAL) 169 goto out; 170 dev = FIB_RES_DEV(res); 171 172 if (dev) 173 dev_hold(dev); 174 out: 175 fib_res_put(&res); 176 return dev; 177 } 178 EXPORT_SYMBOL(ip_dev_find); 179 180 /* 181 * Find address type as if only "dev" was present in the system. If 182 * on_dev is NULL then all interfaces are taken into consideration. 183 */ 184 static inline unsigned __inet_dev_addr_type(struct net *net, 185 const struct net_device *dev, 186 __be32 addr) 187 { 188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 189 struct fib_result res; 190 unsigned ret = RTN_BROADCAST; 191 struct fib_table *local_table; 192 193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) 194 return RTN_BROADCAST; 195 if (ipv4_is_multicast(addr)) 196 return RTN_MULTICAST; 197 198 #ifdef CONFIG_IP_MULTIPLE_TABLES 199 res.r = NULL; 200 #endif 201 202 local_table = fib_get_table(net, RT_TABLE_LOCAL); 203 if (local_table) { 204 ret = RTN_UNICAST; 205 if (!fib_table_lookup(local_table, &fl, &res)) { 206 if (!dev || dev == res.fi->fib_dev) 207 ret = res.type; 208 fib_res_put(&res); 209 } 210 } 211 return ret; 212 } 213 214 unsigned int inet_addr_type(struct net *net, __be32 addr) 215 { 216 return __inet_dev_addr_type(net, NULL, addr); 217 } 218 EXPORT_SYMBOL(inet_addr_type); 219 220 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 221 __be32 addr) 222 { 223 return __inet_dev_addr_type(net, dev, addr); 224 } 225 EXPORT_SYMBOL(inet_dev_addr_type); 226 227 /* Given (packet source, input interface) and optional (dst, oif, tos): 228 - (main) check, that source is valid i.e. not broadcast or our local 229 address. 230 - figure out what "logical" interface this packet arrived 231 and calculate "specific destination" address. 232 - check, that packet arrived from expected physical interface. 233 */ 234 235 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 236 struct net_device *dev, __be32 *spec_dst, 237 u32 *itag, u32 mark) 238 { 239 struct in_device *in_dev; 240 struct flowi fl = { .nl_u = { .ip4_u = 241 { .daddr = src, 242 .saddr = dst, 243 .tos = tos } }, 244 .mark = mark, 245 .iif = oif }; 246 247 struct fib_result res; 248 int no_addr, rpf, accept_local; 249 int ret; 250 struct net *net; 251 252 no_addr = rpf = accept_local = 0; 253 rcu_read_lock(); 254 in_dev = __in_dev_get_rcu(dev); 255 if (in_dev) { 256 no_addr = in_dev->ifa_list == NULL; 257 rpf = IN_DEV_RPFILTER(in_dev); 258 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 259 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 260 fl.mark = 0; 261 } 262 rcu_read_unlock(); 263 264 if (in_dev == NULL) 265 goto e_inval; 266 267 net = dev_net(dev); 268 if (fib_lookup(net, &fl, &res)) 269 goto last_resort; 270 if (res.type != RTN_UNICAST) { 271 if (res.type != RTN_LOCAL || !accept_local) 272 goto e_inval_res; 273 } 274 *spec_dst = FIB_RES_PREFSRC(res); 275 fib_combine_itag(itag, &res); 276 #ifdef CONFIG_IP_ROUTE_MULTIPATH 277 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) 278 #else 279 if (FIB_RES_DEV(res) == dev) 280 #endif 281 { 282 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 283 fib_res_put(&res); 284 return ret; 285 } 286 fib_res_put(&res); 287 if (no_addr) 288 goto last_resort; 289 if (rpf == 1) 290 goto e_rpf; 291 fl.oif = dev->ifindex; 292 293 ret = 0; 294 if (fib_lookup(net, &fl, &res) == 0) { 295 if (res.type == RTN_UNICAST) { 296 *spec_dst = FIB_RES_PREFSRC(res); 297 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 298 } 299 fib_res_put(&res); 300 } 301 return ret; 302 303 last_resort: 304 if (rpf) 305 goto e_rpf; 306 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 307 *itag = 0; 308 return 0; 309 310 e_inval_res: 311 fib_res_put(&res); 312 e_inval: 313 return -EINVAL; 314 e_rpf: 315 return -EXDEV; 316 } 317 318 static inline __be32 sk_extract_addr(struct sockaddr *addr) 319 { 320 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 321 } 322 323 static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 324 { 325 struct nlattr *nla; 326 327 nla = (struct nlattr *) ((char *) mx + len); 328 nla->nla_type = type; 329 nla->nla_len = nla_attr_size(4); 330 *(u32 *) nla_data(nla) = value; 331 332 return len + nla_total_size(4); 333 } 334 335 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 336 struct fib_config *cfg) 337 { 338 __be32 addr; 339 int plen; 340 341 memset(cfg, 0, sizeof(*cfg)); 342 cfg->fc_nlinfo.nl_net = net; 343 344 if (rt->rt_dst.sa_family != AF_INET) 345 return -EAFNOSUPPORT; 346 347 /* 348 * Check mask for validity: 349 * a) it must be contiguous. 350 * b) destination must have all host bits clear. 351 * c) if application forgot to set correct family (AF_INET), 352 * reject request unless it is absolutely clear i.e. 353 * both family and mask are zero. 354 */ 355 plen = 32; 356 addr = sk_extract_addr(&rt->rt_dst); 357 if (!(rt->rt_flags & RTF_HOST)) { 358 __be32 mask = sk_extract_addr(&rt->rt_genmask); 359 360 if (rt->rt_genmask.sa_family != AF_INET) { 361 if (mask || rt->rt_genmask.sa_family) 362 return -EAFNOSUPPORT; 363 } 364 365 if (bad_mask(mask, addr)) 366 return -EINVAL; 367 368 plen = inet_mask_len(mask); 369 } 370 371 cfg->fc_dst_len = plen; 372 cfg->fc_dst = addr; 373 374 if (cmd != SIOCDELRT) { 375 cfg->fc_nlflags = NLM_F_CREATE; 376 cfg->fc_protocol = RTPROT_BOOT; 377 } 378 379 if (rt->rt_metric) 380 cfg->fc_priority = rt->rt_metric - 1; 381 382 if (rt->rt_flags & RTF_REJECT) { 383 cfg->fc_scope = RT_SCOPE_HOST; 384 cfg->fc_type = RTN_UNREACHABLE; 385 return 0; 386 } 387 388 cfg->fc_scope = RT_SCOPE_NOWHERE; 389 cfg->fc_type = RTN_UNICAST; 390 391 if (rt->rt_dev) { 392 char *colon; 393 struct net_device *dev; 394 char devname[IFNAMSIZ]; 395 396 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 397 return -EFAULT; 398 399 devname[IFNAMSIZ-1] = 0; 400 colon = strchr(devname, ':'); 401 if (colon) 402 *colon = 0; 403 dev = __dev_get_by_name(net, devname); 404 if (!dev) 405 return -ENODEV; 406 cfg->fc_oif = dev->ifindex; 407 if (colon) { 408 struct in_ifaddr *ifa; 409 struct in_device *in_dev = __in_dev_get_rtnl(dev); 410 if (!in_dev) 411 return -ENODEV; 412 *colon = ':'; 413 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 414 if (strcmp(ifa->ifa_label, devname) == 0) 415 break; 416 if (ifa == NULL) 417 return -ENODEV; 418 cfg->fc_prefsrc = ifa->ifa_local; 419 } 420 } 421 422 addr = sk_extract_addr(&rt->rt_gateway); 423 if (rt->rt_gateway.sa_family == AF_INET && addr) { 424 cfg->fc_gw = addr; 425 if (rt->rt_flags & RTF_GATEWAY && 426 inet_addr_type(net, addr) == RTN_UNICAST) 427 cfg->fc_scope = RT_SCOPE_UNIVERSE; 428 } 429 430 if (cmd == SIOCDELRT) 431 return 0; 432 433 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 434 return -EINVAL; 435 436 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 437 cfg->fc_scope = RT_SCOPE_LINK; 438 439 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 440 struct nlattr *mx; 441 int len = 0; 442 443 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 444 if (mx == NULL) 445 return -ENOMEM; 446 447 if (rt->rt_flags & RTF_MTU) 448 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 449 450 if (rt->rt_flags & RTF_WINDOW) 451 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 452 453 if (rt->rt_flags & RTF_IRTT) 454 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 455 456 cfg->fc_mx = mx; 457 cfg->fc_mx_len = len; 458 } 459 460 return 0; 461 } 462 463 /* 464 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 465 */ 466 467 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 468 { 469 struct fib_config cfg; 470 struct rtentry rt; 471 int err; 472 473 switch (cmd) { 474 case SIOCADDRT: /* Add a route */ 475 case SIOCDELRT: /* Delete a route */ 476 if (!capable(CAP_NET_ADMIN)) 477 return -EPERM; 478 479 if (copy_from_user(&rt, arg, sizeof(rt))) 480 return -EFAULT; 481 482 rtnl_lock(); 483 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 484 if (err == 0) { 485 struct fib_table *tb; 486 487 if (cmd == SIOCDELRT) { 488 tb = fib_get_table(net, cfg.fc_table); 489 if (tb) 490 err = fib_table_delete(tb, &cfg); 491 else 492 err = -ESRCH; 493 } else { 494 tb = fib_new_table(net, cfg.fc_table); 495 if (tb) 496 err = fib_table_insert(tb, &cfg); 497 else 498 err = -ENOBUFS; 499 } 500 501 /* allocated by rtentry_to_fib_config() */ 502 kfree(cfg.fc_mx); 503 } 504 rtnl_unlock(); 505 return err; 506 } 507 return -EINVAL; 508 } 509 510 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 511 [RTA_DST] = { .type = NLA_U32 }, 512 [RTA_SRC] = { .type = NLA_U32 }, 513 [RTA_IIF] = { .type = NLA_U32 }, 514 [RTA_OIF] = { .type = NLA_U32 }, 515 [RTA_GATEWAY] = { .type = NLA_U32 }, 516 [RTA_PRIORITY] = { .type = NLA_U32 }, 517 [RTA_PREFSRC] = { .type = NLA_U32 }, 518 [RTA_METRICS] = { .type = NLA_NESTED }, 519 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 520 [RTA_FLOW] = { .type = NLA_U32 }, 521 }; 522 523 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 524 struct nlmsghdr *nlh, struct fib_config *cfg) 525 { 526 struct nlattr *attr; 527 int err, remaining; 528 struct rtmsg *rtm; 529 530 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 531 if (err < 0) 532 goto errout; 533 534 memset(cfg, 0, sizeof(*cfg)); 535 536 rtm = nlmsg_data(nlh); 537 cfg->fc_dst_len = rtm->rtm_dst_len; 538 cfg->fc_tos = rtm->rtm_tos; 539 cfg->fc_table = rtm->rtm_table; 540 cfg->fc_protocol = rtm->rtm_protocol; 541 cfg->fc_scope = rtm->rtm_scope; 542 cfg->fc_type = rtm->rtm_type; 543 cfg->fc_flags = rtm->rtm_flags; 544 cfg->fc_nlflags = nlh->nlmsg_flags; 545 546 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 547 cfg->fc_nlinfo.nlh = nlh; 548 cfg->fc_nlinfo.nl_net = net; 549 550 if (cfg->fc_type > RTN_MAX) { 551 err = -EINVAL; 552 goto errout; 553 } 554 555 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 556 switch (nla_type(attr)) { 557 case RTA_DST: 558 cfg->fc_dst = nla_get_be32(attr); 559 break; 560 case RTA_OIF: 561 cfg->fc_oif = nla_get_u32(attr); 562 break; 563 case RTA_GATEWAY: 564 cfg->fc_gw = nla_get_be32(attr); 565 break; 566 case RTA_PRIORITY: 567 cfg->fc_priority = nla_get_u32(attr); 568 break; 569 case RTA_PREFSRC: 570 cfg->fc_prefsrc = nla_get_be32(attr); 571 break; 572 case RTA_METRICS: 573 cfg->fc_mx = nla_data(attr); 574 cfg->fc_mx_len = nla_len(attr); 575 break; 576 case RTA_MULTIPATH: 577 cfg->fc_mp = nla_data(attr); 578 cfg->fc_mp_len = nla_len(attr); 579 break; 580 case RTA_FLOW: 581 cfg->fc_flow = nla_get_u32(attr); 582 break; 583 case RTA_TABLE: 584 cfg->fc_table = nla_get_u32(attr); 585 break; 586 } 587 } 588 589 return 0; 590 errout: 591 return err; 592 } 593 594 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 595 { 596 struct net *net = sock_net(skb->sk); 597 struct fib_config cfg; 598 struct fib_table *tb; 599 int err; 600 601 err = rtm_to_fib_config(net, skb, nlh, &cfg); 602 if (err < 0) 603 goto errout; 604 605 tb = fib_get_table(net, cfg.fc_table); 606 if (tb == NULL) { 607 err = -ESRCH; 608 goto errout; 609 } 610 611 err = fib_table_delete(tb, &cfg); 612 errout: 613 return err; 614 } 615 616 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 617 { 618 struct net *net = sock_net(skb->sk); 619 struct fib_config cfg; 620 struct fib_table *tb; 621 int err; 622 623 err = rtm_to_fib_config(net, skb, nlh, &cfg); 624 if (err < 0) 625 goto errout; 626 627 tb = fib_new_table(net, cfg.fc_table); 628 if (tb == NULL) { 629 err = -ENOBUFS; 630 goto errout; 631 } 632 633 err = fib_table_insert(tb, &cfg); 634 errout: 635 return err; 636 } 637 638 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 639 { 640 struct net *net = sock_net(skb->sk); 641 unsigned int h, s_h; 642 unsigned int e = 0, s_e; 643 struct fib_table *tb; 644 struct hlist_node *node; 645 struct hlist_head *head; 646 int dumped = 0; 647 648 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 649 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 650 return ip_rt_dump(skb, cb); 651 652 s_h = cb->args[0]; 653 s_e = cb->args[1]; 654 655 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 656 e = 0; 657 head = &net->ipv4.fib_table_hash[h]; 658 hlist_for_each_entry(tb, node, head, tb_hlist) { 659 if (e < s_e) 660 goto next; 661 if (dumped) 662 memset(&cb->args[2], 0, sizeof(cb->args) - 663 2 * sizeof(cb->args[0])); 664 if (fib_table_dump(tb, skb, cb) < 0) 665 goto out; 666 dumped = 1; 667 next: 668 e++; 669 } 670 } 671 out: 672 cb->args[1] = e; 673 cb->args[0] = h; 674 675 return skb->len; 676 } 677 678 /* Prepare and feed intra-kernel routing request. 679 Really, it should be netlink message, but :-( netlink 680 can be not configured, so that we feed it directly 681 to fib engine. It is legal, because all events occur 682 only when netlink is already locked. 683 */ 684 685 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 686 { 687 struct net *net = dev_net(ifa->ifa_dev->dev); 688 struct fib_table *tb; 689 struct fib_config cfg = { 690 .fc_protocol = RTPROT_KERNEL, 691 .fc_type = type, 692 .fc_dst = dst, 693 .fc_dst_len = dst_len, 694 .fc_prefsrc = ifa->ifa_local, 695 .fc_oif = ifa->ifa_dev->dev->ifindex, 696 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 697 .fc_nlinfo = { 698 .nl_net = net, 699 }, 700 }; 701 702 if (type == RTN_UNICAST) 703 tb = fib_new_table(net, RT_TABLE_MAIN); 704 else 705 tb = fib_new_table(net, RT_TABLE_LOCAL); 706 707 if (tb == NULL) 708 return; 709 710 cfg.fc_table = tb->tb_id; 711 712 if (type != RTN_LOCAL) 713 cfg.fc_scope = RT_SCOPE_LINK; 714 else 715 cfg.fc_scope = RT_SCOPE_HOST; 716 717 if (cmd == RTM_NEWROUTE) 718 fib_table_insert(tb, &cfg); 719 else 720 fib_table_delete(tb, &cfg); 721 } 722 723 void fib_add_ifaddr(struct in_ifaddr *ifa) 724 { 725 struct in_device *in_dev = ifa->ifa_dev; 726 struct net_device *dev = in_dev->dev; 727 struct in_ifaddr *prim = ifa; 728 __be32 mask = ifa->ifa_mask; 729 __be32 addr = ifa->ifa_local; 730 __be32 prefix = ifa->ifa_address&mask; 731 732 if (ifa->ifa_flags&IFA_F_SECONDARY) { 733 prim = inet_ifa_byprefix(in_dev, prefix, mask); 734 if (prim == NULL) { 735 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 736 return; 737 } 738 } 739 740 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 741 742 if (!(dev->flags&IFF_UP)) 743 return; 744 745 /* Add broadcast address, if it is explicitly assigned. */ 746 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 747 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 748 749 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 750 (prefix != addr || ifa->ifa_prefixlen < 32)) { 751 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 752 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 753 754 /* Add network specific broadcasts, when it takes a sense */ 755 if (ifa->ifa_prefixlen < 31) { 756 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 757 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 758 } 759 } 760 } 761 762 static void fib_del_ifaddr(struct in_ifaddr *ifa) 763 { 764 struct in_device *in_dev = ifa->ifa_dev; 765 struct net_device *dev = in_dev->dev; 766 struct in_ifaddr *ifa1; 767 struct in_ifaddr *prim = ifa; 768 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 769 __be32 any = ifa->ifa_address&ifa->ifa_mask; 770 #define LOCAL_OK 1 771 #define BRD_OK 2 772 #define BRD0_OK 4 773 #define BRD1_OK 8 774 unsigned ok = 0; 775 776 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 777 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 778 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 779 else { 780 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 781 if (prim == NULL) { 782 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 783 return; 784 } 785 } 786 787 /* Deletion is more complicated than add. 788 We should take care of not to delete too much :-) 789 790 Scan address list to be sure that addresses are really gone. 791 */ 792 793 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 794 if (ifa->ifa_local == ifa1->ifa_local) 795 ok |= LOCAL_OK; 796 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 797 ok |= BRD_OK; 798 if (brd == ifa1->ifa_broadcast) 799 ok |= BRD1_OK; 800 if (any == ifa1->ifa_broadcast) 801 ok |= BRD0_OK; 802 } 803 804 if (!(ok&BRD_OK)) 805 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 806 if (!(ok&BRD1_OK)) 807 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 808 if (!(ok&BRD0_OK)) 809 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 810 if (!(ok&LOCAL_OK)) { 811 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 812 813 /* Check, that this local address finally disappeared. */ 814 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 815 /* And the last, but not the least thing. 816 We must flush stray FIB entries. 817 818 First of all, we scan fib_info list searching 819 for stray nexthop entries, then ignite fib_flush. 820 */ 821 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 822 fib_flush(dev_net(dev)); 823 } 824 } 825 #undef LOCAL_OK 826 #undef BRD_OK 827 #undef BRD0_OK 828 #undef BRD1_OK 829 } 830 831 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 832 { 833 834 struct fib_result res; 835 struct flowi fl = { .mark = frn->fl_mark, 836 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 837 .tos = frn->fl_tos, 838 .scope = frn->fl_scope } } }; 839 840 #ifdef CONFIG_IP_MULTIPLE_TABLES 841 res.r = NULL; 842 #endif 843 844 frn->err = -ENOENT; 845 if (tb) { 846 local_bh_disable(); 847 848 frn->tb_id = tb->tb_id; 849 frn->err = fib_table_lookup(tb, &fl, &res); 850 851 if (!frn->err) { 852 frn->prefixlen = res.prefixlen; 853 frn->nh_sel = res.nh_sel; 854 frn->type = res.type; 855 frn->scope = res.scope; 856 fib_res_put(&res); 857 } 858 local_bh_enable(); 859 } 860 } 861 862 static void nl_fib_input(struct sk_buff *skb) 863 { 864 struct net *net; 865 struct fib_result_nl *frn; 866 struct nlmsghdr *nlh; 867 struct fib_table *tb; 868 u32 pid; 869 870 net = sock_net(skb->sk); 871 nlh = nlmsg_hdr(skb); 872 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 873 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 874 return; 875 876 skb = skb_clone(skb, GFP_KERNEL); 877 if (skb == NULL) 878 return; 879 nlh = nlmsg_hdr(skb); 880 881 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 882 tb = fib_get_table(net, frn->tb_id_in); 883 884 nl_fib_lookup(frn, tb); 885 886 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 887 NETLINK_CB(skb).pid = 0; /* from kernel */ 888 NETLINK_CB(skb).dst_group = 0; /* unicast */ 889 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 890 } 891 892 static int __net_init nl_fib_lookup_init(struct net *net) 893 { 894 struct sock *sk; 895 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 896 nl_fib_input, NULL, THIS_MODULE); 897 if (sk == NULL) 898 return -EAFNOSUPPORT; 899 net->ipv4.fibnl = sk; 900 return 0; 901 } 902 903 static void nl_fib_lookup_exit(struct net *net) 904 { 905 netlink_kernel_release(net->ipv4.fibnl); 906 net->ipv4.fibnl = NULL; 907 } 908 909 static void fib_disable_ip(struct net_device *dev, int force, int delay) 910 { 911 if (fib_sync_down_dev(dev, force)) 912 fib_flush(dev_net(dev)); 913 rt_cache_flush(dev_net(dev), delay); 914 arp_ifdown(dev); 915 } 916 917 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 918 { 919 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 920 struct net_device *dev = ifa->ifa_dev->dev; 921 922 switch (event) { 923 case NETDEV_UP: 924 fib_add_ifaddr(ifa); 925 #ifdef CONFIG_IP_ROUTE_MULTIPATH 926 fib_sync_up(dev); 927 #endif 928 rt_cache_flush(dev_net(dev), -1); 929 break; 930 case NETDEV_DOWN: 931 fib_del_ifaddr(ifa); 932 if (ifa->ifa_dev->ifa_list == NULL) { 933 /* Last address was deleted from this interface. 934 Disable IP. 935 */ 936 fib_disable_ip(dev, 1, 0); 937 } else { 938 rt_cache_flush(dev_net(dev), -1); 939 } 940 break; 941 } 942 return NOTIFY_DONE; 943 } 944 945 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 946 { 947 struct net_device *dev = ptr; 948 struct in_device *in_dev = __in_dev_get_rtnl(dev); 949 950 if (event == NETDEV_UNREGISTER) { 951 fib_disable_ip(dev, 2, -1); 952 return NOTIFY_DONE; 953 } 954 955 if (!in_dev) 956 return NOTIFY_DONE; 957 958 switch (event) { 959 case NETDEV_UP: 960 for_ifa(in_dev) { 961 fib_add_ifaddr(ifa); 962 } endfor_ifa(in_dev); 963 #ifdef CONFIG_IP_ROUTE_MULTIPATH 964 fib_sync_up(dev); 965 #endif 966 rt_cache_flush(dev_net(dev), -1); 967 break; 968 case NETDEV_DOWN: 969 fib_disable_ip(dev, 0, 0); 970 break; 971 case NETDEV_CHANGEMTU: 972 case NETDEV_CHANGE: 973 rt_cache_flush(dev_net(dev), 0); 974 break; 975 case NETDEV_UNREGISTER_BATCH: 976 rt_cache_flush_batch(); 977 break; 978 } 979 return NOTIFY_DONE; 980 } 981 982 static struct notifier_block fib_inetaddr_notifier = { 983 .notifier_call = fib_inetaddr_event, 984 }; 985 986 static struct notifier_block fib_netdev_notifier = { 987 .notifier_call = fib_netdev_event, 988 }; 989 990 static int __net_init ip_fib_net_init(struct net *net) 991 { 992 int err; 993 unsigned int i; 994 995 net->ipv4.fib_table_hash = kzalloc( 996 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 997 if (net->ipv4.fib_table_hash == NULL) 998 return -ENOMEM; 999 1000 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 1001 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); 1002 1003 err = fib4_rules_init(net); 1004 if (err < 0) 1005 goto fail; 1006 return 0; 1007 1008 fail: 1009 kfree(net->ipv4.fib_table_hash); 1010 return err; 1011 } 1012 1013 static void ip_fib_net_exit(struct net *net) 1014 { 1015 unsigned int i; 1016 1017 #ifdef CONFIG_IP_MULTIPLE_TABLES 1018 fib4_rules_exit(net); 1019 #endif 1020 1021 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1022 struct fib_table *tb; 1023 struct hlist_head *head; 1024 struct hlist_node *node, *tmp; 1025 1026 head = &net->ipv4.fib_table_hash[i]; 1027 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1028 hlist_del(node); 1029 fib_table_flush(tb); 1030 kfree(tb); 1031 } 1032 } 1033 kfree(net->ipv4.fib_table_hash); 1034 } 1035 1036 static int __net_init fib_net_init(struct net *net) 1037 { 1038 int error; 1039 1040 error = ip_fib_net_init(net); 1041 if (error < 0) 1042 goto out; 1043 error = nl_fib_lookup_init(net); 1044 if (error < 0) 1045 goto out_nlfl; 1046 error = fib_proc_init(net); 1047 if (error < 0) 1048 goto out_proc; 1049 out: 1050 return error; 1051 1052 out_proc: 1053 nl_fib_lookup_exit(net); 1054 out_nlfl: 1055 ip_fib_net_exit(net); 1056 goto out; 1057 } 1058 1059 static void __net_exit fib_net_exit(struct net *net) 1060 { 1061 fib_proc_exit(net); 1062 nl_fib_lookup_exit(net); 1063 ip_fib_net_exit(net); 1064 } 1065 1066 static struct pernet_operations fib_net_ops = { 1067 .init = fib_net_init, 1068 .exit = fib_net_exit, 1069 }; 1070 1071 void __init ip_fib_init(void) 1072 { 1073 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1074 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1075 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1076 1077 register_pernet_subsys(&fib_net_ops); 1078 register_netdevice_notifier(&fib_netdev_notifier); 1079 register_inetaddr_notifier(&fib_inetaddr_notifier); 1080 1081 fib_hash_init(); 1082 } 1083