1 /* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29 #include <asm/uaccess.h> 30 #include <linux/bitops.h> 31 #include <linux/capability.h> 32 #include <linux/module.h> 33 #include <linux/types.h> 34 #include <linux/kernel.h> 35 #include <linux/string.h> 36 #include <linux/mm.h> 37 #include <linux/socket.h> 38 #include <linux/sockios.h> 39 #include <linux/in.h> 40 #include <linux/errno.h> 41 #include <linux/interrupt.h> 42 #include <linux/if_addr.h> 43 #include <linux/if_ether.h> 44 #include <linux/inet.h> 45 #include <linux/netdevice.h> 46 #include <linux/etherdevice.h> 47 #include <linux/skbuff.h> 48 #include <linux/init.h> 49 #include <linux/notifier.h> 50 #include <linux/inetdevice.h> 51 #include <linux/igmp.h> 52 #include <linux/slab.h> 53 #include <linux/hash.h> 54 #ifdef CONFIG_SYSCTL 55 #include <linux/sysctl.h> 56 #endif 57 #include <linux/kmod.h> 58 #include <linux/netconf.h> 59 60 #include <net/arp.h> 61 #include <net/ip.h> 62 #include <net/route.h> 63 #include <net/ip_fib.h> 64 #include <net/rtnetlink.h> 65 #include <net/net_namespace.h> 66 #include <net/addrconf.h> 67 68 #include "fib_lookup.h" 69 70 static struct ipv4_devconf ipv4_devconf = { 71 .data = { 72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, 77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, 78 }, 79 }; 80 81 static struct ipv4_devconf ipv4_devconf_dflt = { 82 .data = { 83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, 89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, 90 }, 91 }; 92 93 #define IPV4_DEVCONF_DFLT(net, attr) \ 94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 95 96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 97 [IFA_LOCAL] = { .type = NLA_U32 }, 98 [IFA_ADDRESS] = { .type = NLA_U32 }, 99 [IFA_BROADCAST] = { .type = NLA_U32 }, 100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, 102 [IFA_FLAGS] = { .type = NLA_U32 }, 103 }; 104 105 #define IN4_ADDR_HSIZE_SHIFT 8 106 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) 107 108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 109 110 static u32 inet_addr_hash(const struct net *net, __be32 addr) 111 { 112 u32 val = (__force u32) addr ^ net_hash_mix(net); 113 114 return hash_32(val, IN4_ADDR_HSIZE_SHIFT); 115 } 116 117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 118 { 119 u32 hash = inet_addr_hash(net, ifa->ifa_local); 120 121 ASSERT_RTNL(); 122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 123 } 124 125 static void inet_hash_remove(struct in_ifaddr *ifa) 126 { 127 ASSERT_RTNL(); 128 hlist_del_init_rcu(&ifa->hash); 129 } 130 131 /** 132 * __ip_dev_find - find the first device with a given source address. 133 * @net: the net namespace 134 * @addr: the source address 135 * @devref: if true, take a reference on the found device 136 * 137 * If a caller uses devref=false, it should be protected by RCU, or RTNL 138 */ 139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 140 { 141 u32 hash = inet_addr_hash(net, addr); 142 struct net_device *result = NULL; 143 struct in_ifaddr *ifa; 144 145 rcu_read_lock(); 146 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { 147 if (ifa->ifa_local == addr) { 148 struct net_device *dev = ifa->ifa_dev->dev; 149 150 if (!net_eq(dev_net(dev), net)) 151 continue; 152 result = dev; 153 break; 154 } 155 } 156 if (!result) { 157 struct flowi4 fl4 = { .daddr = addr }; 158 struct fib_result res = { 0 }; 159 struct fib_table *local; 160 161 /* Fallback to FIB local table so that communication 162 * over loopback subnets work. 163 */ 164 local = fib_get_table(net, RT_TABLE_LOCAL); 165 if (local && 166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && 167 res.type == RTN_LOCAL) 168 result = FIB_RES_DEV(res); 169 } 170 if (result && devref) 171 dev_hold(result); 172 rcu_read_unlock(); 173 return result; 174 } 175 EXPORT_SYMBOL(__ip_dev_find); 176 177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 178 179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 181 int destroy); 182 #ifdef CONFIG_SYSCTL 183 static int devinet_sysctl_register(struct in_device *idev); 184 static void devinet_sysctl_unregister(struct in_device *idev); 185 #else 186 static int devinet_sysctl_register(struct in_device *idev) 187 { 188 return 0; 189 } 190 static void devinet_sysctl_unregister(struct in_device *idev) 191 { 192 } 193 #endif 194 195 /* Locks all the inet devices. */ 196 197 static struct in_ifaddr *inet_alloc_ifa(void) 198 { 199 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 200 } 201 202 static void inet_rcu_free_ifa(struct rcu_head *head) 203 { 204 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 205 if (ifa->ifa_dev) 206 in_dev_put(ifa->ifa_dev); 207 kfree(ifa); 208 } 209 210 static void inet_free_ifa(struct in_ifaddr *ifa) 211 { 212 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 213 } 214 215 void in_dev_finish_destroy(struct in_device *idev) 216 { 217 struct net_device *dev = idev->dev; 218 219 WARN_ON(idev->ifa_list); 220 WARN_ON(idev->mc_list); 221 kfree(rcu_dereference_protected(idev->mc_hash, 1)); 222 #ifdef NET_REFCNT_DEBUG 223 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); 224 #endif 225 dev_put(dev); 226 if (!idev->dead) 227 pr_err("Freeing alive in_device %p\n", idev); 228 else 229 kfree(idev); 230 } 231 EXPORT_SYMBOL(in_dev_finish_destroy); 232 233 static struct in_device *inetdev_init(struct net_device *dev) 234 { 235 struct in_device *in_dev; 236 int err = -ENOMEM; 237 238 ASSERT_RTNL(); 239 240 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 241 if (!in_dev) 242 goto out; 243 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 244 sizeof(in_dev->cnf)); 245 in_dev->cnf.sysctl = NULL; 246 in_dev->dev = dev; 247 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 248 if (!in_dev->arp_parms) 249 goto out_kfree; 250 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 251 dev_disable_lro(dev); 252 /* Reference in_dev->dev */ 253 dev_hold(dev); 254 /* Account for reference dev->ip_ptr (below) */ 255 in_dev_hold(in_dev); 256 257 err = devinet_sysctl_register(in_dev); 258 if (err) { 259 in_dev->dead = 1; 260 in_dev_put(in_dev); 261 in_dev = NULL; 262 goto out; 263 } 264 ip_mc_init_dev(in_dev); 265 if (dev->flags & IFF_UP) 266 ip_mc_up(in_dev); 267 268 /* we can receive as soon as ip_ptr is set -- do this last */ 269 rcu_assign_pointer(dev->ip_ptr, in_dev); 270 out: 271 return in_dev ?: ERR_PTR(err); 272 out_kfree: 273 kfree(in_dev); 274 in_dev = NULL; 275 goto out; 276 } 277 278 static void in_dev_rcu_put(struct rcu_head *head) 279 { 280 struct in_device *idev = container_of(head, struct in_device, rcu_head); 281 in_dev_put(idev); 282 } 283 284 static void inetdev_destroy(struct in_device *in_dev) 285 { 286 struct in_ifaddr *ifa; 287 struct net_device *dev; 288 289 ASSERT_RTNL(); 290 291 dev = in_dev->dev; 292 293 in_dev->dead = 1; 294 295 ip_mc_destroy_dev(in_dev); 296 297 while ((ifa = in_dev->ifa_list) != NULL) { 298 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 299 inet_free_ifa(ifa); 300 } 301 302 RCU_INIT_POINTER(dev->ip_ptr, NULL); 303 304 devinet_sysctl_unregister(in_dev); 305 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 306 arp_ifdown(dev); 307 308 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 309 } 310 311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 312 { 313 rcu_read_lock(); 314 for_primary_ifa(in_dev) { 315 if (inet_ifa_match(a, ifa)) { 316 if (!b || inet_ifa_match(b, ifa)) { 317 rcu_read_unlock(); 318 return 1; 319 } 320 } 321 } endfor_ifa(in_dev); 322 rcu_read_unlock(); 323 return 0; 324 } 325 326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 327 int destroy, struct nlmsghdr *nlh, u32 portid) 328 { 329 struct in_ifaddr *promote = NULL; 330 struct in_ifaddr *ifa, *ifa1 = *ifap; 331 struct in_ifaddr *last_prim = in_dev->ifa_list; 332 struct in_ifaddr *prev_prom = NULL; 333 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 334 335 ASSERT_RTNL(); 336 337 if (in_dev->dead) 338 goto no_promotions; 339 340 /* 1. Deleting primary ifaddr forces deletion all secondaries 341 * unless alias promotion is set 342 **/ 343 344 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 345 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 346 347 while ((ifa = *ifap1) != NULL) { 348 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 349 ifa1->ifa_scope <= ifa->ifa_scope) 350 last_prim = ifa; 351 352 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 353 ifa1->ifa_mask != ifa->ifa_mask || 354 !inet_ifa_match(ifa1->ifa_address, ifa)) { 355 ifap1 = &ifa->ifa_next; 356 prev_prom = ifa; 357 continue; 358 } 359 360 if (!do_promote) { 361 inet_hash_remove(ifa); 362 *ifap1 = ifa->ifa_next; 363 364 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); 365 blocking_notifier_call_chain(&inetaddr_chain, 366 NETDEV_DOWN, ifa); 367 inet_free_ifa(ifa); 368 } else { 369 promote = ifa; 370 break; 371 } 372 } 373 } 374 375 /* On promotion all secondaries from subnet are changing 376 * the primary IP, we must remove all their routes silently 377 * and later to add them back with new prefsrc. Do this 378 * while all addresses are on the device list. 379 */ 380 for (ifa = promote; ifa; ifa = ifa->ifa_next) { 381 if (ifa1->ifa_mask == ifa->ifa_mask && 382 inet_ifa_match(ifa1->ifa_address, ifa)) 383 fib_del_ifaddr(ifa, ifa1); 384 } 385 386 no_promotions: 387 /* 2. Unlink it */ 388 389 *ifap = ifa1->ifa_next; 390 inet_hash_remove(ifa1); 391 392 /* 3. Announce address deletion */ 393 394 /* Send message first, then call notifier. 395 At first sight, FIB update triggered by notifier 396 will refer to already deleted ifaddr, that could confuse 397 netlink listeners. It is not true: look, gated sees 398 that route deleted and if it still thinks that ifaddr 399 is valid, it will try to restore deleted routes... Grr. 400 So that, this order is correct. 401 */ 402 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); 403 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 404 405 if (promote) { 406 struct in_ifaddr *next_sec = promote->ifa_next; 407 408 if (prev_prom) { 409 prev_prom->ifa_next = promote->ifa_next; 410 promote->ifa_next = last_prim->ifa_next; 411 last_prim->ifa_next = promote; 412 } 413 414 promote->ifa_flags &= ~IFA_F_SECONDARY; 415 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); 416 blocking_notifier_call_chain(&inetaddr_chain, 417 NETDEV_UP, promote); 418 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { 419 if (ifa1->ifa_mask != ifa->ifa_mask || 420 !inet_ifa_match(ifa1->ifa_address, ifa)) 421 continue; 422 fib_add_ifaddr(ifa); 423 } 424 425 } 426 if (destroy) 427 inet_free_ifa(ifa1); 428 } 429 430 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 431 int destroy) 432 { 433 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 434 } 435 436 static void check_lifetime(struct work_struct *work); 437 438 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); 439 440 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 441 u32 portid) 442 { 443 struct in_device *in_dev = ifa->ifa_dev; 444 struct in_ifaddr *ifa1, **ifap, **last_primary; 445 446 ASSERT_RTNL(); 447 448 if (!ifa->ifa_local) { 449 inet_free_ifa(ifa); 450 return 0; 451 } 452 453 ifa->ifa_flags &= ~IFA_F_SECONDARY; 454 last_primary = &in_dev->ifa_list; 455 456 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 457 ifap = &ifa1->ifa_next) { 458 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 459 ifa->ifa_scope <= ifa1->ifa_scope) 460 last_primary = &ifa1->ifa_next; 461 if (ifa1->ifa_mask == ifa->ifa_mask && 462 inet_ifa_match(ifa1->ifa_address, ifa)) { 463 if (ifa1->ifa_local == ifa->ifa_local) { 464 inet_free_ifa(ifa); 465 return -EEXIST; 466 } 467 if (ifa1->ifa_scope != ifa->ifa_scope) { 468 inet_free_ifa(ifa); 469 return -EINVAL; 470 } 471 ifa->ifa_flags |= IFA_F_SECONDARY; 472 } 473 } 474 475 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 476 prandom_seed((__force u32) ifa->ifa_local); 477 ifap = last_primary; 478 } 479 480 ifa->ifa_next = *ifap; 481 *ifap = ifa; 482 483 inet_hash_insert(dev_net(in_dev->dev), ifa); 484 485 cancel_delayed_work(&check_lifetime_work); 486 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); 487 488 /* Send message first, then call notifier. 489 Notifier will trigger FIB update, so that 490 listeners of netlink will know about new ifaddr */ 491 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); 492 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 493 494 return 0; 495 } 496 497 static int inet_insert_ifa(struct in_ifaddr *ifa) 498 { 499 return __inet_insert_ifa(ifa, NULL, 0); 500 } 501 502 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 503 { 504 struct in_device *in_dev = __in_dev_get_rtnl(dev); 505 506 ASSERT_RTNL(); 507 508 if (!in_dev) { 509 inet_free_ifa(ifa); 510 return -ENOBUFS; 511 } 512 ipv4_devconf_setall(in_dev); 513 neigh_parms_data_state_setall(in_dev->arp_parms); 514 if (ifa->ifa_dev != in_dev) { 515 WARN_ON(ifa->ifa_dev); 516 in_dev_hold(in_dev); 517 ifa->ifa_dev = in_dev; 518 } 519 if (ipv4_is_loopback(ifa->ifa_local)) 520 ifa->ifa_scope = RT_SCOPE_HOST; 521 return inet_insert_ifa(ifa); 522 } 523 524 /* Caller must hold RCU or RTNL : 525 * We dont take a reference on found in_device 526 */ 527 struct in_device *inetdev_by_index(struct net *net, int ifindex) 528 { 529 struct net_device *dev; 530 struct in_device *in_dev = NULL; 531 532 rcu_read_lock(); 533 dev = dev_get_by_index_rcu(net, ifindex); 534 if (dev) 535 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 536 rcu_read_unlock(); 537 return in_dev; 538 } 539 EXPORT_SYMBOL(inetdev_by_index); 540 541 /* Called only from RTNL semaphored context. No locks. */ 542 543 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 544 __be32 mask) 545 { 546 ASSERT_RTNL(); 547 548 for_primary_ifa(in_dev) { 549 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 550 return ifa; 551 } endfor_ifa(in_dev); 552 return NULL; 553 } 554 555 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) 556 { 557 struct ip_mreqn mreq = { 558 .imr_multiaddr.s_addr = ifa->ifa_address, 559 .imr_ifindex = ifa->ifa_dev->dev->ifindex, 560 }; 561 int ret; 562 563 ASSERT_RTNL(); 564 565 lock_sock(sk); 566 if (join) 567 ret = ip_mc_join_group(sk, &mreq); 568 else 569 ret = ip_mc_leave_group(sk, &mreq); 570 release_sock(sk); 571 572 return ret; 573 } 574 575 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) 576 { 577 struct net *net = sock_net(skb->sk); 578 struct nlattr *tb[IFA_MAX+1]; 579 struct in_device *in_dev; 580 struct ifaddrmsg *ifm; 581 struct in_ifaddr *ifa, **ifap; 582 int err = -EINVAL; 583 584 ASSERT_RTNL(); 585 586 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 587 if (err < 0) 588 goto errout; 589 590 ifm = nlmsg_data(nlh); 591 in_dev = inetdev_by_index(net, ifm->ifa_index); 592 if (!in_dev) { 593 err = -ENODEV; 594 goto errout; 595 } 596 597 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 598 ifap = &ifa->ifa_next) { 599 if (tb[IFA_LOCAL] && 600 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) 601 continue; 602 603 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 604 continue; 605 606 if (tb[IFA_ADDRESS] && 607 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 608 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa))) 609 continue; 610 611 if (ipv4_is_multicast(ifa->ifa_address)) 612 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); 613 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); 614 return 0; 615 } 616 617 err = -EADDRNOTAVAIL; 618 errout: 619 return err; 620 } 621 622 #define INFINITY_LIFE_TIME 0xFFFFFFFF 623 624 static void check_lifetime(struct work_struct *work) 625 { 626 unsigned long now, next, next_sec, next_sched; 627 struct in_ifaddr *ifa; 628 struct hlist_node *n; 629 int i; 630 631 now = jiffies; 632 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); 633 634 for (i = 0; i < IN4_ADDR_HSIZE; i++) { 635 bool change_needed = false; 636 637 rcu_read_lock(); 638 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { 639 unsigned long age; 640 641 if (ifa->ifa_flags & IFA_F_PERMANENT) 642 continue; 643 644 /* We try to batch several events at once. */ 645 age = (now - ifa->ifa_tstamp + 646 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 647 648 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 649 age >= ifa->ifa_valid_lft) { 650 change_needed = true; 651 } else if (ifa->ifa_preferred_lft == 652 INFINITY_LIFE_TIME) { 653 continue; 654 } else if (age >= ifa->ifa_preferred_lft) { 655 if (time_before(ifa->ifa_tstamp + 656 ifa->ifa_valid_lft * HZ, next)) 657 next = ifa->ifa_tstamp + 658 ifa->ifa_valid_lft * HZ; 659 660 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) 661 change_needed = true; 662 } else if (time_before(ifa->ifa_tstamp + 663 ifa->ifa_preferred_lft * HZ, 664 next)) { 665 next = ifa->ifa_tstamp + 666 ifa->ifa_preferred_lft * HZ; 667 } 668 } 669 rcu_read_unlock(); 670 if (!change_needed) 671 continue; 672 rtnl_lock(); 673 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { 674 unsigned long age; 675 676 if (ifa->ifa_flags & IFA_F_PERMANENT) 677 continue; 678 679 /* We try to batch several events at once. */ 680 age = (now - ifa->ifa_tstamp + 681 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 682 683 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 684 age >= ifa->ifa_valid_lft) { 685 struct in_ifaddr **ifap; 686 687 for (ifap = &ifa->ifa_dev->ifa_list; 688 *ifap != NULL; ifap = &(*ifap)->ifa_next) { 689 if (*ifap == ifa) { 690 inet_del_ifa(ifa->ifa_dev, 691 ifap, 1); 692 break; 693 } 694 } 695 } else if (ifa->ifa_preferred_lft != 696 INFINITY_LIFE_TIME && 697 age >= ifa->ifa_preferred_lft && 698 !(ifa->ifa_flags & IFA_F_DEPRECATED)) { 699 ifa->ifa_flags |= IFA_F_DEPRECATED; 700 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 701 } 702 } 703 rtnl_unlock(); 704 } 705 706 next_sec = round_jiffies_up(next); 707 next_sched = next; 708 709 /* If rounded timeout is accurate enough, accept it. */ 710 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) 711 next_sched = next_sec; 712 713 now = jiffies; 714 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ 715 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) 716 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; 717 718 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 719 next_sched - now); 720 } 721 722 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, 723 __u32 prefered_lft) 724 { 725 unsigned long timeout; 726 727 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); 728 729 timeout = addrconf_timeout_fixup(valid_lft, HZ); 730 if (addrconf_finite_timeout(timeout)) 731 ifa->ifa_valid_lft = timeout; 732 else 733 ifa->ifa_flags |= IFA_F_PERMANENT; 734 735 timeout = addrconf_timeout_fixup(prefered_lft, HZ); 736 if (addrconf_finite_timeout(timeout)) { 737 if (timeout == 0) 738 ifa->ifa_flags |= IFA_F_DEPRECATED; 739 ifa->ifa_preferred_lft = timeout; 740 } 741 ifa->ifa_tstamp = jiffies; 742 if (!ifa->ifa_cstamp) 743 ifa->ifa_cstamp = ifa->ifa_tstamp; 744 } 745 746 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, 747 __u32 *pvalid_lft, __u32 *pprefered_lft) 748 { 749 struct nlattr *tb[IFA_MAX+1]; 750 struct in_ifaddr *ifa; 751 struct ifaddrmsg *ifm; 752 struct net_device *dev; 753 struct in_device *in_dev; 754 int err; 755 756 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 757 if (err < 0) 758 goto errout; 759 760 ifm = nlmsg_data(nlh); 761 err = -EINVAL; 762 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL]) 763 goto errout; 764 765 dev = __dev_get_by_index(net, ifm->ifa_index); 766 err = -ENODEV; 767 if (!dev) 768 goto errout; 769 770 in_dev = __in_dev_get_rtnl(dev); 771 err = -ENOBUFS; 772 if (!in_dev) 773 goto errout; 774 775 ifa = inet_alloc_ifa(); 776 if (!ifa) 777 /* 778 * A potential indev allocation can be left alive, it stays 779 * assigned to its device and is destroy with it. 780 */ 781 goto errout; 782 783 ipv4_devconf_setall(in_dev); 784 neigh_parms_data_state_setall(in_dev->arp_parms); 785 in_dev_hold(in_dev); 786 787 if (!tb[IFA_ADDRESS]) 788 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 789 790 INIT_HLIST_NODE(&ifa->hash); 791 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 792 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 793 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : 794 ifm->ifa_flags; 795 ifa->ifa_scope = ifm->ifa_scope; 796 ifa->ifa_dev = in_dev; 797 798 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]); 799 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]); 800 801 if (tb[IFA_BROADCAST]) 802 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]); 803 804 if (tb[IFA_LABEL]) 805 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 806 else 807 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 808 809 if (tb[IFA_CACHEINFO]) { 810 struct ifa_cacheinfo *ci; 811 812 ci = nla_data(tb[IFA_CACHEINFO]); 813 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { 814 err = -EINVAL; 815 goto errout_free; 816 } 817 *pvalid_lft = ci->ifa_valid; 818 *pprefered_lft = ci->ifa_prefered; 819 } 820 821 return ifa; 822 823 errout_free: 824 inet_free_ifa(ifa); 825 errout: 826 return ERR_PTR(err); 827 } 828 829 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) 830 { 831 struct in_device *in_dev = ifa->ifa_dev; 832 struct in_ifaddr *ifa1, **ifap; 833 834 if (!ifa->ifa_local) 835 return NULL; 836 837 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 838 ifap = &ifa1->ifa_next) { 839 if (ifa1->ifa_mask == ifa->ifa_mask && 840 inet_ifa_match(ifa1->ifa_address, ifa) && 841 ifa1->ifa_local == ifa->ifa_local) 842 return ifa1; 843 } 844 return NULL; 845 } 846 847 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) 848 { 849 struct net *net = sock_net(skb->sk); 850 struct in_ifaddr *ifa; 851 struct in_ifaddr *ifa_existing; 852 __u32 valid_lft = INFINITY_LIFE_TIME; 853 __u32 prefered_lft = INFINITY_LIFE_TIME; 854 855 ASSERT_RTNL(); 856 857 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); 858 if (IS_ERR(ifa)) 859 return PTR_ERR(ifa); 860 861 ifa_existing = find_matching_ifa(ifa); 862 if (!ifa_existing) { 863 /* It would be best to check for !NLM_F_CREATE here but 864 * userspace already relies on not having to provide this. 865 */ 866 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 867 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { 868 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, 869 true, ifa); 870 871 if (ret < 0) { 872 inet_free_ifa(ifa); 873 return ret; 874 } 875 } 876 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); 877 } else { 878 inet_free_ifa(ifa); 879 880 if (nlh->nlmsg_flags & NLM_F_EXCL || 881 !(nlh->nlmsg_flags & NLM_F_REPLACE)) 882 return -EEXIST; 883 ifa = ifa_existing; 884 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 885 cancel_delayed_work(&check_lifetime_work); 886 queue_delayed_work(system_power_efficient_wq, 887 &check_lifetime_work, 0); 888 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); 889 } 890 return 0; 891 } 892 893 /* 894 * Determine a default network mask, based on the IP address. 895 */ 896 897 static int inet_abc_len(__be32 addr) 898 { 899 int rc = -1; /* Something else, probably a multicast. */ 900 901 if (ipv4_is_zeronet(addr)) 902 rc = 0; 903 else { 904 __u32 haddr = ntohl(addr); 905 906 if (IN_CLASSA(haddr)) 907 rc = 8; 908 else if (IN_CLASSB(haddr)) 909 rc = 16; 910 else if (IN_CLASSC(haddr)) 911 rc = 24; 912 } 913 914 return rc; 915 } 916 917 918 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 919 { 920 struct ifreq ifr; 921 struct sockaddr_in sin_orig; 922 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 923 struct in_device *in_dev; 924 struct in_ifaddr **ifap = NULL; 925 struct in_ifaddr *ifa = NULL; 926 struct net_device *dev; 927 char *colon; 928 int ret = -EFAULT; 929 int tryaddrmatch = 0; 930 931 /* 932 * Fetch the caller's info block into kernel space 933 */ 934 935 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 936 goto out; 937 ifr.ifr_name[IFNAMSIZ - 1] = 0; 938 939 /* save original address for comparison */ 940 memcpy(&sin_orig, sin, sizeof(*sin)); 941 942 colon = strchr(ifr.ifr_name, ':'); 943 if (colon) 944 *colon = 0; 945 946 dev_load(net, ifr.ifr_name); 947 948 switch (cmd) { 949 case SIOCGIFADDR: /* Get interface address */ 950 case SIOCGIFBRDADDR: /* Get the broadcast address */ 951 case SIOCGIFDSTADDR: /* Get the destination address */ 952 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 953 /* Note that these ioctls will not sleep, 954 so that we do not impose a lock. 955 One day we will be forced to put shlock here (I mean SMP) 956 */ 957 tryaddrmatch = (sin_orig.sin_family == AF_INET); 958 memset(sin, 0, sizeof(*sin)); 959 sin->sin_family = AF_INET; 960 break; 961 962 case SIOCSIFFLAGS: 963 ret = -EPERM; 964 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 965 goto out; 966 break; 967 case SIOCSIFADDR: /* Set interface address (and family) */ 968 case SIOCSIFBRDADDR: /* Set the broadcast address */ 969 case SIOCSIFDSTADDR: /* Set the destination address */ 970 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 971 ret = -EPERM; 972 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 973 goto out; 974 ret = -EINVAL; 975 if (sin->sin_family != AF_INET) 976 goto out; 977 break; 978 default: 979 ret = -EINVAL; 980 goto out; 981 } 982 983 rtnl_lock(); 984 985 ret = -ENODEV; 986 dev = __dev_get_by_name(net, ifr.ifr_name); 987 if (!dev) 988 goto done; 989 990 if (colon) 991 *colon = ':'; 992 993 in_dev = __in_dev_get_rtnl(dev); 994 if (in_dev) { 995 if (tryaddrmatch) { 996 /* Matthias Andree */ 997 /* compare label and address (4.4BSD style) */ 998 /* note: we only do this for a limited set of ioctls 999 and only if the original address family was AF_INET. 1000 This is checked above. */ 1001 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1002 ifap = &ifa->ifa_next) { 1003 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 1004 sin_orig.sin_addr.s_addr == 1005 ifa->ifa_local) { 1006 break; /* found */ 1007 } 1008 } 1009 } 1010 /* we didn't get a match, maybe the application is 1011 4.3BSD-style and passed in junk so we fall back to 1012 comparing just the label */ 1013 if (!ifa) { 1014 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1015 ifap = &ifa->ifa_next) 1016 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 1017 break; 1018 } 1019 } 1020 1021 ret = -EADDRNOTAVAIL; 1022 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 1023 goto done; 1024 1025 switch (cmd) { 1026 case SIOCGIFADDR: /* Get interface address */ 1027 sin->sin_addr.s_addr = ifa->ifa_local; 1028 goto rarok; 1029 1030 case SIOCGIFBRDADDR: /* Get the broadcast address */ 1031 sin->sin_addr.s_addr = ifa->ifa_broadcast; 1032 goto rarok; 1033 1034 case SIOCGIFDSTADDR: /* Get the destination address */ 1035 sin->sin_addr.s_addr = ifa->ifa_address; 1036 goto rarok; 1037 1038 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 1039 sin->sin_addr.s_addr = ifa->ifa_mask; 1040 goto rarok; 1041 1042 case SIOCSIFFLAGS: 1043 if (colon) { 1044 ret = -EADDRNOTAVAIL; 1045 if (!ifa) 1046 break; 1047 ret = 0; 1048 if (!(ifr.ifr_flags & IFF_UP)) 1049 inet_del_ifa(in_dev, ifap, 1); 1050 break; 1051 } 1052 ret = dev_change_flags(dev, ifr.ifr_flags); 1053 break; 1054 1055 case SIOCSIFADDR: /* Set interface address (and family) */ 1056 ret = -EINVAL; 1057 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 1058 break; 1059 1060 if (!ifa) { 1061 ret = -ENOBUFS; 1062 ifa = inet_alloc_ifa(); 1063 if (!ifa) 1064 break; 1065 INIT_HLIST_NODE(&ifa->hash); 1066 if (colon) 1067 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 1068 else 1069 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1070 } else { 1071 ret = 0; 1072 if (ifa->ifa_local == sin->sin_addr.s_addr) 1073 break; 1074 inet_del_ifa(in_dev, ifap, 0); 1075 ifa->ifa_broadcast = 0; 1076 ifa->ifa_scope = 0; 1077 } 1078 1079 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 1080 1081 if (!(dev->flags & IFF_POINTOPOINT)) { 1082 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 1083 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 1084 if ((dev->flags & IFF_BROADCAST) && 1085 ifa->ifa_prefixlen < 31) 1086 ifa->ifa_broadcast = ifa->ifa_address | 1087 ~ifa->ifa_mask; 1088 } else { 1089 ifa->ifa_prefixlen = 32; 1090 ifa->ifa_mask = inet_make_mask(32); 1091 } 1092 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 1093 ret = inet_set_ifa(dev, ifa); 1094 break; 1095 1096 case SIOCSIFBRDADDR: /* Set the broadcast address */ 1097 ret = 0; 1098 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 1099 inet_del_ifa(in_dev, ifap, 0); 1100 ifa->ifa_broadcast = sin->sin_addr.s_addr; 1101 inet_insert_ifa(ifa); 1102 } 1103 break; 1104 1105 case SIOCSIFDSTADDR: /* Set the destination address */ 1106 ret = 0; 1107 if (ifa->ifa_address == sin->sin_addr.s_addr) 1108 break; 1109 ret = -EINVAL; 1110 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 1111 break; 1112 ret = 0; 1113 inet_del_ifa(in_dev, ifap, 0); 1114 ifa->ifa_address = sin->sin_addr.s_addr; 1115 inet_insert_ifa(ifa); 1116 break; 1117 1118 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 1119 1120 /* 1121 * The mask we set must be legal. 1122 */ 1123 ret = -EINVAL; 1124 if (bad_mask(sin->sin_addr.s_addr, 0)) 1125 break; 1126 ret = 0; 1127 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 1128 __be32 old_mask = ifa->ifa_mask; 1129 inet_del_ifa(in_dev, ifap, 0); 1130 ifa->ifa_mask = sin->sin_addr.s_addr; 1131 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 1132 1133 /* See if current broadcast address matches 1134 * with current netmask, then recalculate 1135 * the broadcast address. Otherwise it's a 1136 * funny address, so don't touch it since 1137 * the user seems to know what (s)he's doing... 1138 */ 1139 if ((dev->flags & IFF_BROADCAST) && 1140 (ifa->ifa_prefixlen < 31) && 1141 (ifa->ifa_broadcast == 1142 (ifa->ifa_local|~old_mask))) { 1143 ifa->ifa_broadcast = (ifa->ifa_local | 1144 ~sin->sin_addr.s_addr); 1145 } 1146 inet_insert_ifa(ifa); 1147 } 1148 break; 1149 } 1150 done: 1151 rtnl_unlock(); 1152 out: 1153 return ret; 1154 rarok: 1155 rtnl_unlock(); 1156 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 1157 goto out; 1158 } 1159 1160 static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 1161 { 1162 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1163 struct in_ifaddr *ifa; 1164 struct ifreq ifr; 1165 int done = 0; 1166 1167 if (!in_dev) 1168 goto out; 1169 1170 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1171 if (!buf) { 1172 done += sizeof(ifr); 1173 continue; 1174 } 1175 if (len < (int) sizeof(ifr)) 1176 break; 1177 memset(&ifr, 0, sizeof(struct ifreq)); 1178 strcpy(ifr.ifr_name, ifa->ifa_label); 1179 1180 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 1181 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 1182 ifa->ifa_local; 1183 1184 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 1185 done = -EFAULT; 1186 break; 1187 } 1188 buf += sizeof(struct ifreq); 1189 len -= sizeof(struct ifreq); 1190 done += sizeof(struct ifreq); 1191 } 1192 out: 1193 return done; 1194 } 1195 1196 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 1197 { 1198 __be32 addr = 0; 1199 struct in_device *in_dev; 1200 struct net *net = dev_net(dev); 1201 int master_idx; 1202 1203 rcu_read_lock(); 1204 in_dev = __in_dev_get_rcu(dev); 1205 if (!in_dev) 1206 goto no_in_dev; 1207 1208 for_primary_ifa(in_dev) { 1209 if (ifa->ifa_scope > scope) 1210 continue; 1211 if (!dst || inet_ifa_match(dst, ifa)) { 1212 addr = ifa->ifa_local; 1213 break; 1214 } 1215 if (!addr) 1216 addr = ifa->ifa_local; 1217 } endfor_ifa(in_dev); 1218 1219 if (addr) 1220 goto out_unlock; 1221 no_in_dev: 1222 master_idx = l3mdev_master_ifindex_rcu(dev); 1223 1224 /* For VRFs, the VRF device takes the place of the loopback device, 1225 * with addresses on it being preferred. Note in such cases the 1226 * loopback device will be among the devices that fail the master_idx 1227 * equality check in the loop below. 1228 */ 1229 if (master_idx && 1230 (dev = dev_get_by_index_rcu(net, master_idx)) && 1231 (in_dev = __in_dev_get_rcu(dev))) { 1232 for_primary_ifa(in_dev) { 1233 if (ifa->ifa_scope != RT_SCOPE_LINK && 1234 ifa->ifa_scope <= scope) { 1235 addr = ifa->ifa_local; 1236 goto out_unlock; 1237 } 1238 } endfor_ifa(in_dev); 1239 } 1240 1241 /* Not loopback addresses on loopback should be preferred 1242 in this case. It is important that lo is the first interface 1243 in dev_base list. 1244 */ 1245 for_each_netdev_rcu(net, dev) { 1246 if (l3mdev_master_ifindex_rcu(dev) != master_idx) 1247 continue; 1248 1249 in_dev = __in_dev_get_rcu(dev); 1250 if (!in_dev) 1251 continue; 1252 1253 for_primary_ifa(in_dev) { 1254 if (ifa->ifa_scope != RT_SCOPE_LINK && 1255 ifa->ifa_scope <= scope) { 1256 addr = ifa->ifa_local; 1257 goto out_unlock; 1258 } 1259 } endfor_ifa(in_dev); 1260 } 1261 out_unlock: 1262 rcu_read_unlock(); 1263 return addr; 1264 } 1265 EXPORT_SYMBOL(inet_select_addr); 1266 1267 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 1268 __be32 local, int scope) 1269 { 1270 int same = 0; 1271 __be32 addr = 0; 1272 1273 for_ifa(in_dev) { 1274 if (!addr && 1275 (local == ifa->ifa_local || !local) && 1276 ifa->ifa_scope <= scope) { 1277 addr = ifa->ifa_local; 1278 if (same) 1279 break; 1280 } 1281 if (!same) { 1282 same = (!local || inet_ifa_match(local, ifa)) && 1283 (!dst || inet_ifa_match(dst, ifa)); 1284 if (same && addr) { 1285 if (local || !dst) 1286 break; 1287 /* Is the selected addr into dst subnet? */ 1288 if (inet_ifa_match(addr, ifa)) 1289 break; 1290 /* No, then can we use new local src? */ 1291 if (ifa->ifa_scope <= scope) { 1292 addr = ifa->ifa_local; 1293 break; 1294 } 1295 /* search for large dst subnet for addr */ 1296 same = 0; 1297 } 1298 } 1299 } endfor_ifa(in_dev); 1300 1301 return same ? addr : 0; 1302 } 1303 1304 /* 1305 * Confirm that local IP address exists using wildcards: 1306 * - net: netns to check, cannot be NULL 1307 * - in_dev: only on this interface, NULL=any interface 1308 * - dst: only in the same subnet as dst, 0=any dst 1309 * - local: address, 0=autoselect the local address 1310 * - scope: maximum allowed scope value for the local address 1311 */ 1312 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, 1313 __be32 dst, __be32 local, int scope) 1314 { 1315 __be32 addr = 0; 1316 struct net_device *dev; 1317 1318 if (in_dev) 1319 return confirm_addr_indev(in_dev, dst, local, scope); 1320 1321 rcu_read_lock(); 1322 for_each_netdev_rcu(net, dev) { 1323 in_dev = __in_dev_get_rcu(dev); 1324 if (in_dev) { 1325 addr = confirm_addr_indev(in_dev, dst, local, scope); 1326 if (addr) 1327 break; 1328 } 1329 } 1330 rcu_read_unlock(); 1331 1332 return addr; 1333 } 1334 EXPORT_SYMBOL(inet_confirm_addr); 1335 1336 /* 1337 * Device notifier 1338 */ 1339 1340 int register_inetaddr_notifier(struct notifier_block *nb) 1341 { 1342 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1343 } 1344 EXPORT_SYMBOL(register_inetaddr_notifier); 1345 1346 int unregister_inetaddr_notifier(struct notifier_block *nb) 1347 { 1348 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1349 } 1350 EXPORT_SYMBOL(unregister_inetaddr_notifier); 1351 1352 /* Rename ifa_labels for a device name change. Make some effort to preserve 1353 * existing alias numbering and to create unique labels if possible. 1354 */ 1355 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1356 { 1357 struct in_ifaddr *ifa; 1358 int named = 0; 1359 1360 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1361 char old[IFNAMSIZ], *dot; 1362 1363 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1364 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1365 if (named++ == 0) 1366 goto skip; 1367 dot = strchr(old, ':'); 1368 if (!dot) { 1369 sprintf(old, ":%d", named); 1370 dot = old; 1371 } 1372 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1373 strcat(ifa->ifa_label, dot); 1374 else 1375 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1376 skip: 1377 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1378 } 1379 } 1380 1381 static bool inetdev_valid_mtu(unsigned int mtu) 1382 { 1383 return mtu >= 68; 1384 } 1385 1386 static void inetdev_send_gratuitous_arp(struct net_device *dev, 1387 struct in_device *in_dev) 1388 1389 { 1390 struct in_ifaddr *ifa; 1391 1392 for (ifa = in_dev->ifa_list; ifa; 1393 ifa = ifa->ifa_next) { 1394 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1395 ifa->ifa_local, dev, 1396 ifa->ifa_local, NULL, 1397 dev->dev_addr, NULL); 1398 } 1399 } 1400 1401 /* Called only under RTNL semaphore */ 1402 1403 static int inetdev_event(struct notifier_block *this, unsigned long event, 1404 void *ptr) 1405 { 1406 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1407 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1408 1409 ASSERT_RTNL(); 1410 1411 if (!in_dev) { 1412 if (event == NETDEV_REGISTER) { 1413 in_dev = inetdev_init(dev); 1414 if (IS_ERR(in_dev)) 1415 return notifier_from_errno(PTR_ERR(in_dev)); 1416 if (dev->flags & IFF_LOOPBACK) { 1417 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1418 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1419 } 1420 } else if (event == NETDEV_CHANGEMTU) { 1421 /* Re-enabling IP */ 1422 if (inetdev_valid_mtu(dev->mtu)) 1423 in_dev = inetdev_init(dev); 1424 } 1425 goto out; 1426 } 1427 1428 switch (event) { 1429 case NETDEV_REGISTER: 1430 pr_debug("%s: bug\n", __func__); 1431 RCU_INIT_POINTER(dev->ip_ptr, NULL); 1432 break; 1433 case NETDEV_UP: 1434 if (!inetdev_valid_mtu(dev->mtu)) 1435 break; 1436 if (dev->flags & IFF_LOOPBACK) { 1437 struct in_ifaddr *ifa = inet_alloc_ifa(); 1438 1439 if (ifa) { 1440 INIT_HLIST_NODE(&ifa->hash); 1441 ifa->ifa_local = 1442 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1443 ifa->ifa_prefixlen = 8; 1444 ifa->ifa_mask = inet_make_mask(8); 1445 in_dev_hold(in_dev); 1446 ifa->ifa_dev = in_dev; 1447 ifa->ifa_scope = RT_SCOPE_HOST; 1448 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1449 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, 1450 INFINITY_LIFE_TIME); 1451 ipv4_devconf_setall(in_dev); 1452 neigh_parms_data_state_setall(in_dev->arp_parms); 1453 inet_insert_ifa(ifa); 1454 } 1455 } 1456 ip_mc_up(in_dev); 1457 /* fall through */ 1458 case NETDEV_CHANGEADDR: 1459 if (!IN_DEV_ARP_NOTIFY(in_dev)) 1460 break; 1461 /* fall through */ 1462 case NETDEV_NOTIFY_PEERS: 1463 /* Send gratuitous ARP to notify of link change */ 1464 inetdev_send_gratuitous_arp(dev, in_dev); 1465 break; 1466 case NETDEV_DOWN: 1467 ip_mc_down(in_dev); 1468 break; 1469 case NETDEV_PRE_TYPE_CHANGE: 1470 ip_mc_unmap(in_dev); 1471 break; 1472 case NETDEV_POST_TYPE_CHANGE: 1473 ip_mc_remap(in_dev); 1474 break; 1475 case NETDEV_CHANGEMTU: 1476 if (inetdev_valid_mtu(dev->mtu)) 1477 break; 1478 /* disable IP when MTU is not enough */ 1479 case NETDEV_UNREGISTER: 1480 inetdev_destroy(in_dev); 1481 break; 1482 case NETDEV_CHANGENAME: 1483 /* Do not notify about label change, this event is 1484 * not interesting to applications using netlink. 1485 */ 1486 inetdev_changename(dev, in_dev); 1487 1488 devinet_sysctl_unregister(in_dev); 1489 devinet_sysctl_register(in_dev); 1490 break; 1491 } 1492 out: 1493 return NOTIFY_DONE; 1494 } 1495 1496 static struct notifier_block ip_netdev_notifier = { 1497 .notifier_call = inetdev_event, 1498 }; 1499 1500 static size_t inet_nlmsg_size(void) 1501 { 1502 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1503 + nla_total_size(4) /* IFA_ADDRESS */ 1504 + nla_total_size(4) /* IFA_LOCAL */ 1505 + nla_total_size(4) /* IFA_BROADCAST */ 1506 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ 1507 + nla_total_size(4) /* IFA_FLAGS */ 1508 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ 1509 } 1510 1511 static inline u32 cstamp_delta(unsigned long cstamp) 1512 { 1513 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; 1514 } 1515 1516 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, 1517 unsigned long tstamp, u32 preferred, u32 valid) 1518 { 1519 struct ifa_cacheinfo ci; 1520 1521 ci.cstamp = cstamp_delta(cstamp); 1522 ci.tstamp = cstamp_delta(tstamp); 1523 ci.ifa_prefered = preferred; 1524 ci.ifa_valid = valid; 1525 1526 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); 1527 } 1528 1529 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1530 u32 portid, u32 seq, int event, unsigned int flags) 1531 { 1532 struct ifaddrmsg *ifm; 1533 struct nlmsghdr *nlh; 1534 u32 preferred, valid; 1535 1536 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); 1537 if (!nlh) 1538 return -EMSGSIZE; 1539 1540 ifm = nlmsg_data(nlh); 1541 ifm->ifa_family = AF_INET; 1542 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1543 ifm->ifa_flags = ifa->ifa_flags; 1544 ifm->ifa_scope = ifa->ifa_scope; 1545 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1546 1547 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { 1548 preferred = ifa->ifa_preferred_lft; 1549 valid = ifa->ifa_valid_lft; 1550 if (preferred != INFINITY_LIFE_TIME) { 1551 long tval = (jiffies - ifa->ifa_tstamp) / HZ; 1552 1553 if (preferred > tval) 1554 preferred -= tval; 1555 else 1556 preferred = 0; 1557 if (valid != INFINITY_LIFE_TIME) { 1558 if (valid > tval) 1559 valid -= tval; 1560 else 1561 valid = 0; 1562 } 1563 } 1564 } else { 1565 preferred = INFINITY_LIFE_TIME; 1566 valid = INFINITY_LIFE_TIME; 1567 } 1568 if ((ifa->ifa_address && 1569 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) || 1570 (ifa->ifa_local && 1571 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) || 1572 (ifa->ifa_broadcast && 1573 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || 1574 (ifa->ifa_label[0] && 1575 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || 1576 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || 1577 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, 1578 preferred, valid)) 1579 goto nla_put_failure; 1580 1581 nlmsg_end(skb, nlh); 1582 return 0; 1583 1584 nla_put_failure: 1585 nlmsg_cancel(skb, nlh); 1586 return -EMSGSIZE; 1587 } 1588 1589 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1590 { 1591 struct net *net = sock_net(skb->sk); 1592 int h, s_h; 1593 int idx, s_idx; 1594 int ip_idx, s_ip_idx; 1595 struct net_device *dev; 1596 struct in_device *in_dev; 1597 struct in_ifaddr *ifa; 1598 struct hlist_head *head; 1599 1600 s_h = cb->args[0]; 1601 s_idx = idx = cb->args[1]; 1602 s_ip_idx = ip_idx = cb->args[2]; 1603 1604 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1605 idx = 0; 1606 head = &net->dev_index_head[h]; 1607 rcu_read_lock(); 1608 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 1609 net->dev_base_seq; 1610 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1611 if (idx < s_idx) 1612 goto cont; 1613 if (h > s_h || idx > s_idx) 1614 s_ip_idx = 0; 1615 in_dev = __in_dev_get_rcu(dev); 1616 if (!in_dev) 1617 goto cont; 1618 1619 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1620 ifa = ifa->ifa_next, ip_idx++) { 1621 if (ip_idx < s_ip_idx) 1622 continue; 1623 if (inet_fill_ifaddr(skb, ifa, 1624 NETLINK_CB(cb->skb).portid, 1625 cb->nlh->nlmsg_seq, 1626 RTM_NEWADDR, NLM_F_MULTI) < 0) { 1627 rcu_read_unlock(); 1628 goto done; 1629 } 1630 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1631 } 1632 cont: 1633 idx++; 1634 } 1635 rcu_read_unlock(); 1636 } 1637 1638 done: 1639 cb->args[0] = h; 1640 cb->args[1] = idx; 1641 cb->args[2] = ip_idx; 1642 1643 return skb->len; 1644 } 1645 1646 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1647 u32 portid) 1648 { 1649 struct sk_buff *skb; 1650 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1651 int err = -ENOBUFS; 1652 struct net *net; 1653 1654 net = dev_net(ifa->ifa_dev->dev); 1655 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1656 if (!skb) 1657 goto errout; 1658 1659 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); 1660 if (err < 0) { 1661 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1662 WARN_ON(err == -EMSGSIZE); 1663 kfree_skb(skb); 1664 goto errout; 1665 } 1666 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1667 return; 1668 errout: 1669 if (err < 0) 1670 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1671 } 1672 1673 static size_t inet_get_link_af_size(const struct net_device *dev, 1674 u32 ext_filter_mask) 1675 { 1676 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1677 1678 if (!in_dev) 1679 return 0; 1680 1681 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1682 } 1683 1684 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev, 1685 u32 ext_filter_mask) 1686 { 1687 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1688 struct nlattr *nla; 1689 int i; 1690 1691 if (!in_dev) 1692 return -ENODATA; 1693 1694 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1695 if (!nla) 1696 return -EMSGSIZE; 1697 1698 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1699 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1700 1701 return 0; 1702 } 1703 1704 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1705 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1706 }; 1707 1708 static int inet_validate_link_af(const struct net_device *dev, 1709 const struct nlattr *nla) 1710 { 1711 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1712 int err, rem; 1713 1714 if (dev && !__in_dev_get_rtnl(dev)) 1715 return -EAFNOSUPPORT; 1716 1717 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); 1718 if (err < 0) 1719 return err; 1720 1721 if (tb[IFLA_INET_CONF]) { 1722 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1723 int cfgid = nla_type(a); 1724 1725 if (nla_len(a) < 4) 1726 return -EINVAL; 1727 1728 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1729 return -EINVAL; 1730 } 1731 } 1732 1733 return 0; 1734 } 1735 1736 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1737 { 1738 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1739 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1740 int rem; 1741 1742 if (!in_dev) 1743 return -EAFNOSUPPORT; 1744 1745 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) 1746 BUG(); 1747 1748 if (tb[IFLA_INET_CONF]) { 1749 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1750 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1751 } 1752 1753 return 0; 1754 } 1755 1756 static int inet_netconf_msgsize_devconf(int type) 1757 { 1758 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 1759 + nla_total_size(4); /* NETCONFA_IFINDEX */ 1760 bool all = false; 1761 1762 if (type == NETCONFA_ALL) 1763 all = true; 1764 1765 if (all || type == NETCONFA_FORWARDING) 1766 size += nla_total_size(4); 1767 if (all || type == NETCONFA_RP_FILTER) 1768 size += nla_total_size(4); 1769 if (all || type == NETCONFA_MC_FORWARDING) 1770 size += nla_total_size(4); 1771 if (all || type == NETCONFA_PROXY_NEIGH) 1772 size += nla_total_size(4); 1773 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) 1774 size += nla_total_size(4); 1775 1776 return size; 1777 } 1778 1779 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, 1780 struct ipv4_devconf *devconf, u32 portid, 1781 u32 seq, int event, unsigned int flags, 1782 int type) 1783 { 1784 struct nlmsghdr *nlh; 1785 struct netconfmsg *ncm; 1786 bool all = false; 1787 1788 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 1789 flags); 1790 if (!nlh) 1791 return -EMSGSIZE; 1792 1793 if (type == NETCONFA_ALL) 1794 all = true; 1795 1796 ncm = nlmsg_data(nlh); 1797 ncm->ncm_family = AF_INET; 1798 1799 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) 1800 goto nla_put_failure; 1801 1802 if ((all || type == NETCONFA_FORWARDING) && 1803 nla_put_s32(skb, NETCONFA_FORWARDING, 1804 IPV4_DEVCONF(*devconf, FORWARDING)) < 0) 1805 goto nla_put_failure; 1806 if ((all || type == NETCONFA_RP_FILTER) && 1807 nla_put_s32(skb, NETCONFA_RP_FILTER, 1808 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) 1809 goto nla_put_failure; 1810 if ((all || type == NETCONFA_MC_FORWARDING) && 1811 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 1812 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) 1813 goto nla_put_failure; 1814 if ((all || type == NETCONFA_PROXY_NEIGH) && 1815 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, 1816 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) 1817 goto nla_put_failure; 1818 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && 1819 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 1820 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) 1821 goto nla_put_failure; 1822 1823 nlmsg_end(skb, nlh); 1824 return 0; 1825 1826 nla_put_failure: 1827 nlmsg_cancel(skb, nlh); 1828 return -EMSGSIZE; 1829 } 1830 1831 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, 1832 struct ipv4_devconf *devconf) 1833 { 1834 struct sk_buff *skb; 1835 int err = -ENOBUFS; 1836 1837 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL); 1838 if (!skb) 1839 goto errout; 1840 1841 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, 1842 RTM_NEWNETCONF, 0, type); 1843 if (err < 0) { 1844 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1845 WARN_ON(err == -EMSGSIZE); 1846 kfree_skb(skb); 1847 goto errout; 1848 } 1849 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL); 1850 return; 1851 errout: 1852 if (err < 0) 1853 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); 1854 } 1855 1856 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { 1857 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1858 [NETCONFA_FORWARDING] = { .len = sizeof(int) }, 1859 [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, 1860 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, 1861 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, 1862 }; 1863 1864 static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1865 struct nlmsghdr *nlh) 1866 { 1867 struct net *net = sock_net(in_skb->sk); 1868 struct nlattr *tb[NETCONFA_MAX+1]; 1869 struct netconfmsg *ncm; 1870 struct sk_buff *skb; 1871 struct ipv4_devconf *devconf; 1872 struct in_device *in_dev; 1873 struct net_device *dev; 1874 int ifindex; 1875 int err; 1876 1877 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 1878 devconf_ipv4_policy); 1879 if (err < 0) 1880 goto errout; 1881 1882 err = -EINVAL; 1883 if (!tb[NETCONFA_IFINDEX]) 1884 goto errout; 1885 1886 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); 1887 switch (ifindex) { 1888 case NETCONFA_IFINDEX_ALL: 1889 devconf = net->ipv4.devconf_all; 1890 break; 1891 case NETCONFA_IFINDEX_DEFAULT: 1892 devconf = net->ipv4.devconf_dflt; 1893 break; 1894 default: 1895 dev = __dev_get_by_index(net, ifindex); 1896 if (!dev) 1897 goto errout; 1898 in_dev = __in_dev_get_rtnl(dev); 1899 if (!in_dev) 1900 goto errout; 1901 devconf = &in_dev->cnf; 1902 break; 1903 } 1904 1905 err = -ENOBUFS; 1906 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); 1907 if (!skb) 1908 goto errout; 1909 1910 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 1911 NETLINK_CB(in_skb).portid, 1912 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 1913 NETCONFA_ALL); 1914 if (err < 0) { 1915 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1916 WARN_ON(err == -EMSGSIZE); 1917 kfree_skb(skb); 1918 goto errout; 1919 } 1920 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1921 errout: 1922 return err; 1923 } 1924 1925 static int inet_netconf_dump_devconf(struct sk_buff *skb, 1926 struct netlink_callback *cb) 1927 { 1928 struct net *net = sock_net(skb->sk); 1929 int h, s_h; 1930 int idx, s_idx; 1931 struct net_device *dev; 1932 struct in_device *in_dev; 1933 struct hlist_head *head; 1934 1935 s_h = cb->args[0]; 1936 s_idx = idx = cb->args[1]; 1937 1938 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1939 idx = 0; 1940 head = &net->dev_index_head[h]; 1941 rcu_read_lock(); 1942 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 1943 net->dev_base_seq; 1944 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1945 if (idx < s_idx) 1946 goto cont; 1947 in_dev = __in_dev_get_rcu(dev); 1948 if (!in_dev) 1949 goto cont; 1950 1951 if (inet_netconf_fill_devconf(skb, dev->ifindex, 1952 &in_dev->cnf, 1953 NETLINK_CB(cb->skb).portid, 1954 cb->nlh->nlmsg_seq, 1955 RTM_NEWNETCONF, 1956 NLM_F_MULTI, 1957 NETCONFA_ALL) < 0) { 1958 rcu_read_unlock(); 1959 goto done; 1960 } 1961 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1962 cont: 1963 idx++; 1964 } 1965 rcu_read_unlock(); 1966 } 1967 if (h == NETDEV_HASHENTRIES) { 1968 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, 1969 net->ipv4.devconf_all, 1970 NETLINK_CB(cb->skb).portid, 1971 cb->nlh->nlmsg_seq, 1972 RTM_NEWNETCONF, NLM_F_MULTI, 1973 NETCONFA_ALL) < 0) 1974 goto done; 1975 else 1976 h++; 1977 } 1978 if (h == NETDEV_HASHENTRIES + 1) { 1979 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, 1980 net->ipv4.devconf_dflt, 1981 NETLINK_CB(cb->skb).portid, 1982 cb->nlh->nlmsg_seq, 1983 RTM_NEWNETCONF, NLM_F_MULTI, 1984 NETCONFA_ALL) < 0) 1985 goto done; 1986 else 1987 h++; 1988 } 1989 done: 1990 cb->args[0] = h; 1991 cb->args[1] = idx; 1992 1993 return skb->len; 1994 } 1995 1996 #ifdef CONFIG_SYSCTL 1997 1998 static void devinet_copy_dflt_conf(struct net *net, int i) 1999 { 2000 struct net_device *dev; 2001 2002 rcu_read_lock(); 2003 for_each_netdev_rcu(net, dev) { 2004 struct in_device *in_dev; 2005 2006 in_dev = __in_dev_get_rcu(dev); 2007 if (in_dev && !test_bit(i, in_dev->cnf.state)) 2008 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 2009 } 2010 rcu_read_unlock(); 2011 } 2012 2013 /* called with RTNL locked */ 2014 static void inet_forward_change(struct net *net) 2015 { 2016 struct net_device *dev; 2017 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 2018 2019 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 2020 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 2021 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, 2022 NETCONFA_IFINDEX_ALL, 2023 net->ipv4.devconf_all); 2024 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, 2025 NETCONFA_IFINDEX_DEFAULT, 2026 net->ipv4.devconf_dflt); 2027 2028 for_each_netdev(net, dev) { 2029 struct in_device *in_dev; 2030 2031 if (on) 2032 dev_disable_lro(dev); 2033 2034 in_dev = __in_dev_get_rtnl(dev); 2035 if (in_dev) { 2036 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 2037 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, 2038 dev->ifindex, &in_dev->cnf); 2039 } 2040 } 2041 } 2042 2043 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf) 2044 { 2045 if (cnf == net->ipv4.devconf_dflt) 2046 return NETCONFA_IFINDEX_DEFAULT; 2047 else if (cnf == net->ipv4.devconf_all) 2048 return NETCONFA_IFINDEX_ALL; 2049 else { 2050 struct in_device *idev 2051 = container_of(cnf, struct in_device, cnf); 2052 return idev->dev->ifindex; 2053 } 2054 } 2055 2056 static int devinet_conf_proc(struct ctl_table *ctl, int write, 2057 void __user *buffer, 2058 size_t *lenp, loff_t *ppos) 2059 { 2060 int old_value = *(int *)ctl->data; 2061 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2062 int new_value = *(int *)ctl->data; 2063 2064 if (write) { 2065 struct ipv4_devconf *cnf = ctl->extra1; 2066 struct net *net = ctl->extra2; 2067 int i = (int *)ctl->data - cnf->data; 2068 int ifindex; 2069 2070 set_bit(i, cnf->state); 2071 2072 if (cnf == net->ipv4.devconf_dflt) 2073 devinet_copy_dflt_conf(net, i); 2074 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || 2075 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 2076 if ((new_value == 0) && (old_value != 0)) 2077 rt_cache_flush(net); 2078 2079 if (i == IPV4_DEVCONF_RP_FILTER - 1 && 2080 new_value != old_value) { 2081 ifindex = devinet_conf_ifindex(net, cnf); 2082 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, 2083 ifindex, cnf); 2084 } 2085 if (i == IPV4_DEVCONF_PROXY_ARP - 1 && 2086 new_value != old_value) { 2087 ifindex = devinet_conf_ifindex(net, cnf); 2088 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, 2089 ifindex, cnf); 2090 } 2091 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && 2092 new_value != old_value) { 2093 ifindex = devinet_conf_ifindex(net, cnf); 2094 inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 2095 ifindex, cnf); 2096 } 2097 } 2098 2099 return ret; 2100 } 2101 2102 static int devinet_sysctl_forward(struct ctl_table *ctl, int write, 2103 void __user *buffer, 2104 size_t *lenp, loff_t *ppos) 2105 { 2106 int *valp = ctl->data; 2107 int val = *valp; 2108 loff_t pos = *ppos; 2109 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2110 2111 if (write && *valp != val) { 2112 struct net *net = ctl->extra2; 2113 2114 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 2115 if (!rtnl_trylock()) { 2116 /* Restore the original values before restarting */ 2117 *valp = val; 2118 *ppos = pos; 2119 return restart_syscall(); 2120 } 2121 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 2122 inet_forward_change(net); 2123 } else { 2124 struct ipv4_devconf *cnf = ctl->extra1; 2125 struct in_device *idev = 2126 container_of(cnf, struct in_device, cnf); 2127 if (*valp) 2128 dev_disable_lro(idev->dev); 2129 inet_netconf_notify_devconf(net, 2130 NETCONFA_FORWARDING, 2131 idev->dev->ifindex, 2132 cnf); 2133 } 2134 rtnl_unlock(); 2135 rt_cache_flush(net); 2136 } else 2137 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, 2138 NETCONFA_IFINDEX_DEFAULT, 2139 net->ipv4.devconf_dflt); 2140 } 2141 2142 return ret; 2143 } 2144 2145 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, 2146 void __user *buffer, 2147 size_t *lenp, loff_t *ppos) 2148 { 2149 int *valp = ctl->data; 2150 int val = *valp; 2151 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2152 struct net *net = ctl->extra2; 2153 2154 if (write && *valp != val) 2155 rt_cache_flush(net); 2156 2157 return ret; 2158 } 2159 2160 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 2161 { \ 2162 .procname = name, \ 2163 .data = ipv4_devconf.data + \ 2164 IPV4_DEVCONF_ ## attr - 1, \ 2165 .maxlen = sizeof(int), \ 2166 .mode = mval, \ 2167 .proc_handler = proc, \ 2168 .extra1 = &ipv4_devconf, \ 2169 } 2170 2171 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 2172 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 2173 2174 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 2175 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 2176 2177 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 2178 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 2179 2180 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 2181 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 2182 2183 static struct devinet_sysctl_table { 2184 struct ctl_table_header *sysctl_header; 2185 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 2186 } devinet_sysctl = { 2187 .devinet_vars = { 2188 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 2189 devinet_sysctl_forward), 2190 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 2191 2192 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 2193 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 2194 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 2195 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 2196 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 2197 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 2198 "accept_source_route"), 2199 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 2200 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 2201 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 2202 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 2203 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 2204 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 2205 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 2206 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 2207 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 2208 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 2209 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 2210 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 2211 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 2212 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, 2213 "force_igmp_version"), 2214 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL, 2215 "igmpv2_unsolicited_report_interval"), 2216 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, 2217 "igmpv3_unsolicited_report_interval"), 2218 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, 2219 "ignore_routes_with_linkdown"), 2220 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP, 2221 "drop_gratuitous_arp"), 2222 2223 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2224 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2225 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 2226 "promote_secondaries"), 2227 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 2228 "route_localnet"), 2229 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST, 2230 "drop_unicast_in_l2_multicast"), 2231 }, 2232 }; 2233 2234 static int __devinet_sysctl_register(struct net *net, char *dev_name, 2235 int ifindex, struct ipv4_devconf *p) 2236 { 2237 int i; 2238 struct devinet_sysctl_table *t; 2239 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; 2240 2241 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 2242 if (!t) 2243 goto out; 2244 2245 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 2246 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 2247 t->devinet_vars[i].extra1 = p; 2248 t->devinet_vars[i].extra2 = net; 2249 } 2250 2251 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); 2252 2253 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); 2254 if (!t->sysctl_header) 2255 goto free; 2256 2257 p->sysctl = t; 2258 2259 inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); 2260 return 0; 2261 2262 free: 2263 kfree(t); 2264 out: 2265 return -ENOBUFS; 2266 } 2267 2268 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) 2269 { 2270 struct devinet_sysctl_table *t = cnf->sysctl; 2271 2272 if (!t) 2273 return; 2274 2275 cnf->sysctl = NULL; 2276 unregister_net_sysctl_table(t->sysctl_header); 2277 kfree(t); 2278 } 2279 2280 static int devinet_sysctl_register(struct in_device *idev) 2281 { 2282 int err; 2283 2284 if (!sysctl_dev_name_is_allowed(idev->dev->name)) 2285 return -EINVAL; 2286 2287 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); 2288 if (err) 2289 return err; 2290 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 2291 idev->dev->ifindex, &idev->cnf); 2292 if (err) 2293 neigh_sysctl_unregister(idev->arp_parms); 2294 return err; 2295 } 2296 2297 static void devinet_sysctl_unregister(struct in_device *idev) 2298 { 2299 __devinet_sysctl_unregister(&idev->cnf); 2300 neigh_sysctl_unregister(idev->arp_parms); 2301 } 2302 2303 static struct ctl_table ctl_forward_entry[] = { 2304 { 2305 .procname = "ip_forward", 2306 .data = &ipv4_devconf.data[ 2307 IPV4_DEVCONF_FORWARDING - 1], 2308 .maxlen = sizeof(int), 2309 .mode = 0644, 2310 .proc_handler = devinet_sysctl_forward, 2311 .extra1 = &ipv4_devconf, 2312 .extra2 = &init_net, 2313 }, 2314 { }, 2315 }; 2316 #endif 2317 2318 static __net_init int devinet_init_net(struct net *net) 2319 { 2320 int err; 2321 struct ipv4_devconf *all, *dflt; 2322 #ifdef CONFIG_SYSCTL 2323 struct ctl_table *tbl = ctl_forward_entry; 2324 struct ctl_table_header *forw_hdr; 2325 #endif 2326 2327 err = -ENOMEM; 2328 all = &ipv4_devconf; 2329 dflt = &ipv4_devconf_dflt; 2330 2331 if (!net_eq(net, &init_net)) { 2332 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 2333 if (!all) 2334 goto err_alloc_all; 2335 2336 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 2337 if (!dflt) 2338 goto err_alloc_dflt; 2339 2340 #ifdef CONFIG_SYSCTL 2341 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 2342 if (!tbl) 2343 goto err_alloc_ctl; 2344 2345 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 2346 tbl[0].extra1 = all; 2347 tbl[0].extra2 = net; 2348 #endif 2349 } 2350 2351 #ifdef CONFIG_SYSCTL 2352 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all); 2353 if (err < 0) 2354 goto err_reg_all; 2355 2356 err = __devinet_sysctl_register(net, "default", 2357 NETCONFA_IFINDEX_DEFAULT, dflt); 2358 if (err < 0) 2359 goto err_reg_dflt; 2360 2361 err = -ENOMEM; 2362 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); 2363 if (!forw_hdr) 2364 goto err_reg_ctl; 2365 net->ipv4.forw_hdr = forw_hdr; 2366 #endif 2367 2368 net->ipv4.devconf_all = all; 2369 net->ipv4.devconf_dflt = dflt; 2370 return 0; 2371 2372 #ifdef CONFIG_SYSCTL 2373 err_reg_ctl: 2374 __devinet_sysctl_unregister(dflt); 2375 err_reg_dflt: 2376 __devinet_sysctl_unregister(all); 2377 err_reg_all: 2378 if (tbl != ctl_forward_entry) 2379 kfree(tbl); 2380 err_alloc_ctl: 2381 #endif 2382 if (dflt != &ipv4_devconf_dflt) 2383 kfree(dflt); 2384 err_alloc_dflt: 2385 if (all != &ipv4_devconf) 2386 kfree(all); 2387 err_alloc_all: 2388 return err; 2389 } 2390 2391 static __net_exit void devinet_exit_net(struct net *net) 2392 { 2393 #ifdef CONFIG_SYSCTL 2394 struct ctl_table *tbl; 2395 2396 tbl = net->ipv4.forw_hdr->ctl_table_arg; 2397 unregister_net_sysctl_table(net->ipv4.forw_hdr); 2398 __devinet_sysctl_unregister(net->ipv4.devconf_dflt); 2399 __devinet_sysctl_unregister(net->ipv4.devconf_all); 2400 kfree(tbl); 2401 #endif 2402 kfree(net->ipv4.devconf_dflt); 2403 kfree(net->ipv4.devconf_all); 2404 } 2405 2406 static __net_initdata struct pernet_operations devinet_ops = { 2407 .init = devinet_init_net, 2408 .exit = devinet_exit_net, 2409 }; 2410 2411 static struct rtnl_af_ops inet_af_ops __read_mostly = { 2412 .family = AF_INET, 2413 .fill_link_af = inet_fill_link_af, 2414 .get_link_af_size = inet_get_link_af_size, 2415 .validate_link_af = inet_validate_link_af, 2416 .set_link_af = inet_set_link_af, 2417 }; 2418 2419 void __init devinet_init(void) 2420 { 2421 int i; 2422 2423 for (i = 0; i < IN4_ADDR_HSIZE; i++) 2424 INIT_HLIST_HEAD(&inet_addr_lst[i]); 2425 2426 register_pernet_subsys(&devinet_ops); 2427 2428 register_gifconf(PF_INET, inet_gifconf); 2429 register_netdevice_notifier(&ip_netdev_notifier); 2430 2431 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); 2432 2433 rtnl_af_register(&inet_af_ops); 2434 2435 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 2436 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 2437 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 2438 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, 2439 inet_netconf_dump_devconf, NULL); 2440 } 2441