1 /* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29 #include <linux/uaccess.h> 30 #include <linux/bitops.h> 31 #include <linux/capability.h> 32 #include <linux/module.h> 33 #include <linux/types.h> 34 #include <linux/kernel.h> 35 #include <linux/sched/signal.h> 36 #include <linux/string.h> 37 #include <linux/mm.h> 38 #include <linux/socket.h> 39 #include <linux/sockios.h> 40 #include <linux/in.h> 41 #include <linux/errno.h> 42 #include <linux/interrupt.h> 43 #include <linux/if_addr.h> 44 #include <linux/if_ether.h> 45 #include <linux/inet.h> 46 #include <linux/netdevice.h> 47 #include <linux/etherdevice.h> 48 #include <linux/skbuff.h> 49 #include <linux/init.h> 50 #include <linux/notifier.h> 51 #include <linux/inetdevice.h> 52 #include <linux/igmp.h> 53 #include <linux/slab.h> 54 #include <linux/hash.h> 55 #ifdef CONFIG_SYSCTL 56 #include <linux/sysctl.h> 57 #endif 58 #include <linux/kmod.h> 59 #include <linux/netconf.h> 60 61 #include <net/arp.h> 62 #include <net/ip.h> 63 #include <net/route.h> 64 #include <net/ip_fib.h> 65 #include <net/rtnetlink.h> 66 #include <net/net_namespace.h> 67 #include <net/addrconf.h> 68 69 static struct ipv4_devconf ipv4_devconf = { 70 .data = { 71 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 72 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 73 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 74 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 75 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, 76 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, 77 }, 78 }; 79 80 static struct ipv4_devconf ipv4_devconf_dflt = { 81 .data = { 82 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 83 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 84 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 85 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 86 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 87 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, 88 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, 89 }, 90 }; 91 92 #define IPV4_DEVCONF_DFLT(net, attr) \ 93 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 94 95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 96 [IFA_LOCAL] = { .type = NLA_U32 }, 97 [IFA_ADDRESS] = { .type = NLA_U32 }, 98 [IFA_BROADCAST] = { .type = NLA_U32 }, 99 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 100 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, 101 [IFA_FLAGS] = { .type = NLA_U32 }, 102 }; 103 104 #define IN4_ADDR_HSIZE_SHIFT 8 105 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) 106 107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 108 109 static u32 inet_addr_hash(const struct net *net, __be32 addr) 110 { 111 u32 val = (__force u32) addr ^ net_hash_mix(net); 112 113 return hash_32(val, IN4_ADDR_HSIZE_SHIFT); 114 } 115 116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 117 { 118 u32 hash = inet_addr_hash(net, ifa->ifa_local); 119 120 ASSERT_RTNL(); 121 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 122 } 123 124 static void inet_hash_remove(struct in_ifaddr *ifa) 125 { 126 ASSERT_RTNL(); 127 hlist_del_init_rcu(&ifa->hash); 128 } 129 130 /** 131 * __ip_dev_find - find the first device with a given source address. 132 * @net: the net namespace 133 * @addr: the source address 134 * @devref: if true, take a reference on the found device 135 * 136 * If a caller uses devref=false, it should be protected by RCU, or RTNL 137 */ 138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 139 { 140 u32 hash = inet_addr_hash(net, addr); 141 struct net_device *result = NULL; 142 struct in_ifaddr *ifa; 143 144 rcu_read_lock(); 145 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { 146 if (ifa->ifa_local == addr) { 147 struct net_device *dev = ifa->ifa_dev->dev; 148 149 if (!net_eq(dev_net(dev), net)) 150 continue; 151 result = dev; 152 break; 153 } 154 } 155 if (!result) { 156 struct flowi4 fl4 = { .daddr = addr }; 157 struct fib_result res = { 0 }; 158 struct fib_table *local; 159 160 /* Fallback to FIB local table so that communication 161 * over loopback subnets work. 162 */ 163 local = fib_get_table(net, RT_TABLE_LOCAL); 164 if (local && 165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && 166 res.type == RTN_LOCAL) 167 result = FIB_RES_DEV(res); 168 } 169 if (result && devref) 170 dev_hold(result); 171 rcu_read_unlock(); 172 return result; 173 } 174 EXPORT_SYMBOL(__ip_dev_find); 175 176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 177 178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 180 int destroy); 181 #ifdef CONFIG_SYSCTL 182 static int devinet_sysctl_register(struct in_device *idev); 183 static void devinet_sysctl_unregister(struct in_device *idev); 184 #else 185 static int devinet_sysctl_register(struct in_device *idev) 186 { 187 return 0; 188 } 189 static void devinet_sysctl_unregister(struct in_device *idev) 190 { 191 } 192 #endif 193 194 /* Locks all the inet devices. */ 195 196 static struct in_ifaddr *inet_alloc_ifa(void) 197 { 198 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 199 } 200 201 static void inet_rcu_free_ifa(struct rcu_head *head) 202 { 203 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 204 if (ifa->ifa_dev) 205 in_dev_put(ifa->ifa_dev); 206 kfree(ifa); 207 } 208 209 static void inet_free_ifa(struct in_ifaddr *ifa) 210 { 211 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 212 } 213 214 void in_dev_finish_destroy(struct in_device *idev) 215 { 216 struct net_device *dev = idev->dev; 217 218 WARN_ON(idev->ifa_list); 219 WARN_ON(idev->mc_list); 220 kfree(rcu_dereference_protected(idev->mc_hash, 1)); 221 #ifdef NET_REFCNT_DEBUG 222 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); 223 #endif 224 dev_put(dev); 225 if (!idev->dead) 226 pr_err("Freeing alive in_device %p\n", idev); 227 else 228 kfree(idev); 229 } 230 EXPORT_SYMBOL(in_dev_finish_destroy); 231 232 static struct in_device *inetdev_init(struct net_device *dev) 233 { 234 struct in_device *in_dev; 235 int err = -ENOMEM; 236 237 ASSERT_RTNL(); 238 239 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 240 if (!in_dev) 241 goto out; 242 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 243 sizeof(in_dev->cnf)); 244 in_dev->cnf.sysctl = NULL; 245 in_dev->dev = dev; 246 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 247 if (!in_dev->arp_parms) 248 goto out_kfree; 249 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 250 dev_disable_lro(dev); 251 /* Reference in_dev->dev */ 252 dev_hold(dev); 253 /* Account for reference dev->ip_ptr (below) */ 254 in_dev_hold(in_dev); 255 256 err = devinet_sysctl_register(in_dev); 257 if (err) { 258 in_dev->dead = 1; 259 in_dev_put(in_dev); 260 in_dev = NULL; 261 goto out; 262 } 263 ip_mc_init_dev(in_dev); 264 if (dev->flags & IFF_UP) 265 ip_mc_up(in_dev); 266 267 /* we can receive as soon as ip_ptr is set -- do this last */ 268 rcu_assign_pointer(dev->ip_ptr, in_dev); 269 out: 270 return in_dev ?: ERR_PTR(err); 271 out_kfree: 272 kfree(in_dev); 273 in_dev = NULL; 274 goto out; 275 } 276 277 static void in_dev_rcu_put(struct rcu_head *head) 278 { 279 struct in_device *idev = container_of(head, struct in_device, rcu_head); 280 in_dev_put(idev); 281 } 282 283 static void inetdev_destroy(struct in_device *in_dev) 284 { 285 struct in_ifaddr *ifa; 286 struct net_device *dev; 287 288 ASSERT_RTNL(); 289 290 dev = in_dev->dev; 291 292 in_dev->dead = 1; 293 294 ip_mc_destroy_dev(in_dev); 295 296 while ((ifa = in_dev->ifa_list) != NULL) { 297 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 298 inet_free_ifa(ifa); 299 } 300 301 RCU_INIT_POINTER(dev->ip_ptr, NULL); 302 303 devinet_sysctl_unregister(in_dev); 304 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 305 arp_ifdown(dev); 306 307 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 308 } 309 310 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 311 { 312 rcu_read_lock(); 313 for_primary_ifa(in_dev) { 314 if (inet_ifa_match(a, ifa)) { 315 if (!b || inet_ifa_match(b, ifa)) { 316 rcu_read_unlock(); 317 return 1; 318 } 319 } 320 } endfor_ifa(in_dev); 321 rcu_read_unlock(); 322 return 0; 323 } 324 325 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 326 int destroy, struct nlmsghdr *nlh, u32 portid) 327 { 328 struct in_ifaddr *promote = NULL; 329 struct in_ifaddr *ifa, *ifa1 = *ifap; 330 struct in_ifaddr *last_prim = in_dev->ifa_list; 331 struct in_ifaddr *prev_prom = NULL; 332 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 333 334 ASSERT_RTNL(); 335 336 if (in_dev->dead) 337 goto no_promotions; 338 339 /* 1. Deleting primary ifaddr forces deletion all secondaries 340 * unless alias promotion is set 341 **/ 342 343 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 344 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 345 346 while ((ifa = *ifap1) != NULL) { 347 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 348 ifa1->ifa_scope <= ifa->ifa_scope) 349 last_prim = ifa; 350 351 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 352 ifa1->ifa_mask != ifa->ifa_mask || 353 !inet_ifa_match(ifa1->ifa_address, ifa)) { 354 ifap1 = &ifa->ifa_next; 355 prev_prom = ifa; 356 continue; 357 } 358 359 if (!do_promote) { 360 inet_hash_remove(ifa); 361 *ifap1 = ifa->ifa_next; 362 363 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); 364 blocking_notifier_call_chain(&inetaddr_chain, 365 NETDEV_DOWN, ifa); 366 inet_free_ifa(ifa); 367 } else { 368 promote = ifa; 369 break; 370 } 371 } 372 } 373 374 /* On promotion all secondaries from subnet are changing 375 * the primary IP, we must remove all their routes silently 376 * and later to add them back with new prefsrc. Do this 377 * while all addresses are on the device list. 378 */ 379 for (ifa = promote; ifa; ifa = ifa->ifa_next) { 380 if (ifa1->ifa_mask == ifa->ifa_mask && 381 inet_ifa_match(ifa1->ifa_address, ifa)) 382 fib_del_ifaddr(ifa, ifa1); 383 } 384 385 no_promotions: 386 /* 2. Unlink it */ 387 388 *ifap = ifa1->ifa_next; 389 inet_hash_remove(ifa1); 390 391 /* 3. Announce address deletion */ 392 393 /* Send message first, then call notifier. 394 At first sight, FIB update triggered by notifier 395 will refer to already deleted ifaddr, that could confuse 396 netlink listeners. It is not true: look, gated sees 397 that route deleted and if it still thinks that ifaddr 398 is valid, it will try to restore deleted routes... Grr. 399 So that, this order is correct. 400 */ 401 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); 402 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 403 404 if (promote) { 405 struct in_ifaddr *next_sec = promote->ifa_next; 406 407 if (prev_prom) { 408 prev_prom->ifa_next = promote->ifa_next; 409 promote->ifa_next = last_prim->ifa_next; 410 last_prim->ifa_next = promote; 411 } 412 413 promote->ifa_flags &= ~IFA_F_SECONDARY; 414 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); 415 blocking_notifier_call_chain(&inetaddr_chain, 416 NETDEV_UP, promote); 417 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { 418 if (ifa1->ifa_mask != ifa->ifa_mask || 419 !inet_ifa_match(ifa1->ifa_address, ifa)) 420 continue; 421 fib_add_ifaddr(ifa); 422 } 423 424 } 425 if (destroy) 426 inet_free_ifa(ifa1); 427 } 428 429 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 430 int destroy) 431 { 432 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 433 } 434 435 static void check_lifetime(struct work_struct *work); 436 437 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); 438 439 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 440 u32 portid) 441 { 442 struct in_device *in_dev = ifa->ifa_dev; 443 struct in_ifaddr *ifa1, **ifap, **last_primary; 444 445 ASSERT_RTNL(); 446 447 if (!ifa->ifa_local) { 448 inet_free_ifa(ifa); 449 return 0; 450 } 451 452 ifa->ifa_flags &= ~IFA_F_SECONDARY; 453 last_primary = &in_dev->ifa_list; 454 455 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 456 ifap = &ifa1->ifa_next) { 457 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 458 ifa->ifa_scope <= ifa1->ifa_scope) 459 last_primary = &ifa1->ifa_next; 460 if (ifa1->ifa_mask == ifa->ifa_mask && 461 inet_ifa_match(ifa1->ifa_address, ifa)) { 462 if (ifa1->ifa_local == ifa->ifa_local) { 463 inet_free_ifa(ifa); 464 return -EEXIST; 465 } 466 if (ifa1->ifa_scope != ifa->ifa_scope) { 467 inet_free_ifa(ifa); 468 return -EINVAL; 469 } 470 ifa->ifa_flags |= IFA_F_SECONDARY; 471 } 472 } 473 474 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 475 prandom_seed((__force u32) ifa->ifa_local); 476 ifap = last_primary; 477 } 478 479 ifa->ifa_next = *ifap; 480 *ifap = ifa; 481 482 inet_hash_insert(dev_net(in_dev->dev), ifa); 483 484 cancel_delayed_work(&check_lifetime_work); 485 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); 486 487 /* Send message first, then call notifier. 488 Notifier will trigger FIB update, so that 489 listeners of netlink will know about new ifaddr */ 490 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); 491 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 492 493 return 0; 494 } 495 496 static int inet_insert_ifa(struct in_ifaddr *ifa) 497 { 498 return __inet_insert_ifa(ifa, NULL, 0); 499 } 500 501 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 502 { 503 struct in_device *in_dev = __in_dev_get_rtnl(dev); 504 505 ASSERT_RTNL(); 506 507 if (!in_dev) { 508 inet_free_ifa(ifa); 509 return -ENOBUFS; 510 } 511 ipv4_devconf_setall(in_dev); 512 neigh_parms_data_state_setall(in_dev->arp_parms); 513 if (ifa->ifa_dev != in_dev) { 514 WARN_ON(ifa->ifa_dev); 515 in_dev_hold(in_dev); 516 ifa->ifa_dev = in_dev; 517 } 518 if (ipv4_is_loopback(ifa->ifa_local)) 519 ifa->ifa_scope = RT_SCOPE_HOST; 520 return inet_insert_ifa(ifa); 521 } 522 523 /* Caller must hold RCU or RTNL : 524 * We dont take a reference on found in_device 525 */ 526 struct in_device *inetdev_by_index(struct net *net, int ifindex) 527 { 528 struct net_device *dev; 529 struct in_device *in_dev = NULL; 530 531 rcu_read_lock(); 532 dev = dev_get_by_index_rcu(net, ifindex); 533 if (dev) 534 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 535 rcu_read_unlock(); 536 return in_dev; 537 } 538 EXPORT_SYMBOL(inetdev_by_index); 539 540 /* Called only from RTNL semaphored context. No locks. */ 541 542 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 543 __be32 mask) 544 { 545 ASSERT_RTNL(); 546 547 for_primary_ifa(in_dev) { 548 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 549 return ifa; 550 } endfor_ifa(in_dev); 551 return NULL; 552 } 553 554 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) 555 { 556 struct ip_mreqn mreq = { 557 .imr_multiaddr.s_addr = ifa->ifa_address, 558 .imr_ifindex = ifa->ifa_dev->dev->ifindex, 559 }; 560 int ret; 561 562 ASSERT_RTNL(); 563 564 lock_sock(sk); 565 if (join) 566 ret = ip_mc_join_group(sk, &mreq); 567 else 568 ret = ip_mc_leave_group(sk, &mreq); 569 release_sock(sk); 570 571 return ret; 572 } 573 574 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) 575 { 576 struct net *net = sock_net(skb->sk); 577 struct nlattr *tb[IFA_MAX+1]; 578 struct in_device *in_dev; 579 struct ifaddrmsg *ifm; 580 struct in_ifaddr *ifa, **ifap; 581 int err = -EINVAL; 582 583 ASSERT_RTNL(); 584 585 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, 586 NULL); 587 if (err < 0) 588 goto errout; 589 590 ifm = nlmsg_data(nlh); 591 in_dev = inetdev_by_index(net, ifm->ifa_index); 592 if (!in_dev) { 593 err = -ENODEV; 594 goto errout; 595 } 596 597 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 598 ifap = &ifa->ifa_next) { 599 if (tb[IFA_LOCAL] && 600 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) 601 continue; 602 603 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 604 continue; 605 606 if (tb[IFA_ADDRESS] && 607 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 608 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa))) 609 continue; 610 611 if (ipv4_is_multicast(ifa->ifa_address)) 612 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); 613 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); 614 return 0; 615 } 616 617 err = -EADDRNOTAVAIL; 618 errout: 619 return err; 620 } 621 622 #define INFINITY_LIFE_TIME 0xFFFFFFFF 623 624 static void check_lifetime(struct work_struct *work) 625 { 626 unsigned long now, next, next_sec, next_sched; 627 struct in_ifaddr *ifa; 628 struct hlist_node *n; 629 int i; 630 631 now = jiffies; 632 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); 633 634 for (i = 0; i < IN4_ADDR_HSIZE; i++) { 635 bool change_needed = false; 636 637 rcu_read_lock(); 638 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { 639 unsigned long age; 640 641 if (ifa->ifa_flags & IFA_F_PERMANENT) 642 continue; 643 644 /* We try to batch several events at once. */ 645 age = (now - ifa->ifa_tstamp + 646 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 647 648 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 649 age >= ifa->ifa_valid_lft) { 650 change_needed = true; 651 } else if (ifa->ifa_preferred_lft == 652 INFINITY_LIFE_TIME) { 653 continue; 654 } else if (age >= ifa->ifa_preferred_lft) { 655 if (time_before(ifa->ifa_tstamp + 656 ifa->ifa_valid_lft * HZ, next)) 657 next = ifa->ifa_tstamp + 658 ifa->ifa_valid_lft * HZ; 659 660 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) 661 change_needed = true; 662 } else if (time_before(ifa->ifa_tstamp + 663 ifa->ifa_preferred_lft * HZ, 664 next)) { 665 next = ifa->ifa_tstamp + 666 ifa->ifa_preferred_lft * HZ; 667 } 668 } 669 rcu_read_unlock(); 670 if (!change_needed) 671 continue; 672 rtnl_lock(); 673 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { 674 unsigned long age; 675 676 if (ifa->ifa_flags & IFA_F_PERMANENT) 677 continue; 678 679 /* We try to batch several events at once. */ 680 age = (now - ifa->ifa_tstamp + 681 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 682 683 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 684 age >= ifa->ifa_valid_lft) { 685 struct in_ifaddr **ifap; 686 687 for (ifap = &ifa->ifa_dev->ifa_list; 688 *ifap != NULL; ifap = &(*ifap)->ifa_next) { 689 if (*ifap == ifa) { 690 inet_del_ifa(ifa->ifa_dev, 691 ifap, 1); 692 break; 693 } 694 } 695 } else if (ifa->ifa_preferred_lft != 696 INFINITY_LIFE_TIME && 697 age >= ifa->ifa_preferred_lft && 698 !(ifa->ifa_flags & IFA_F_DEPRECATED)) { 699 ifa->ifa_flags |= IFA_F_DEPRECATED; 700 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 701 } 702 } 703 rtnl_unlock(); 704 } 705 706 next_sec = round_jiffies_up(next); 707 next_sched = next; 708 709 /* If rounded timeout is accurate enough, accept it. */ 710 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) 711 next_sched = next_sec; 712 713 now = jiffies; 714 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ 715 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) 716 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; 717 718 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 719 next_sched - now); 720 } 721 722 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, 723 __u32 prefered_lft) 724 { 725 unsigned long timeout; 726 727 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); 728 729 timeout = addrconf_timeout_fixup(valid_lft, HZ); 730 if (addrconf_finite_timeout(timeout)) 731 ifa->ifa_valid_lft = timeout; 732 else 733 ifa->ifa_flags |= IFA_F_PERMANENT; 734 735 timeout = addrconf_timeout_fixup(prefered_lft, HZ); 736 if (addrconf_finite_timeout(timeout)) { 737 if (timeout == 0) 738 ifa->ifa_flags |= IFA_F_DEPRECATED; 739 ifa->ifa_preferred_lft = timeout; 740 } 741 ifa->ifa_tstamp = jiffies; 742 if (!ifa->ifa_cstamp) 743 ifa->ifa_cstamp = ifa->ifa_tstamp; 744 } 745 746 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, 747 __u32 *pvalid_lft, __u32 *pprefered_lft) 748 { 749 struct nlattr *tb[IFA_MAX+1]; 750 struct in_ifaddr *ifa; 751 struct ifaddrmsg *ifm; 752 struct net_device *dev; 753 struct in_device *in_dev; 754 int err; 755 756 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy, 757 NULL); 758 if (err < 0) 759 goto errout; 760 761 ifm = nlmsg_data(nlh); 762 err = -EINVAL; 763 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL]) 764 goto errout; 765 766 dev = __dev_get_by_index(net, ifm->ifa_index); 767 err = -ENODEV; 768 if (!dev) 769 goto errout; 770 771 in_dev = __in_dev_get_rtnl(dev); 772 err = -ENOBUFS; 773 if (!in_dev) 774 goto errout; 775 776 ifa = inet_alloc_ifa(); 777 if (!ifa) 778 /* 779 * A potential indev allocation can be left alive, it stays 780 * assigned to its device and is destroy with it. 781 */ 782 goto errout; 783 784 ipv4_devconf_setall(in_dev); 785 neigh_parms_data_state_setall(in_dev->arp_parms); 786 in_dev_hold(in_dev); 787 788 if (!tb[IFA_ADDRESS]) 789 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 790 791 INIT_HLIST_NODE(&ifa->hash); 792 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 793 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 794 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : 795 ifm->ifa_flags; 796 ifa->ifa_scope = ifm->ifa_scope; 797 ifa->ifa_dev = in_dev; 798 799 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]); 800 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]); 801 802 if (tb[IFA_BROADCAST]) 803 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]); 804 805 if (tb[IFA_LABEL]) 806 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 807 else 808 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 809 810 if (tb[IFA_CACHEINFO]) { 811 struct ifa_cacheinfo *ci; 812 813 ci = nla_data(tb[IFA_CACHEINFO]); 814 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { 815 err = -EINVAL; 816 goto errout_free; 817 } 818 *pvalid_lft = ci->ifa_valid; 819 *pprefered_lft = ci->ifa_prefered; 820 } 821 822 return ifa; 823 824 errout_free: 825 inet_free_ifa(ifa); 826 errout: 827 return ERR_PTR(err); 828 } 829 830 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) 831 { 832 struct in_device *in_dev = ifa->ifa_dev; 833 struct in_ifaddr *ifa1, **ifap; 834 835 if (!ifa->ifa_local) 836 return NULL; 837 838 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 839 ifap = &ifa1->ifa_next) { 840 if (ifa1->ifa_mask == ifa->ifa_mask && 841 inet_ifa_match(ifa1->ifa_address, ifa) && 842 ifa1->ifa_local == ifa->ifa_local) 843 return ifa1; 844 } 845 return NULL; 846 } 847 848 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) 849 { 850 struct net *net = sock_net(skb->sk); 851 struct in_ifaddr *ifa; 852 struct in_ifaddr *ifa_existing; 853 __u32 valid_lft = INFINITY_LIFE_TIME; 854 __u32 prefered_lft = INFINITY_LIFE_TIME; 855 856 ASSERT_RTNL(); 857 858 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); 859 if (IS_ERR(ifa)) 860 return PTR_ERR(ifa); 861 862 ifa_existing = find_matching_ifa(ifa); 863 if (!ifa_existing) { 864 /* It would be best to check for !NLM_F_CREATE here but 865 * userspace already relies on not having to provide this. 866 */ 867 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 868 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { 869 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, 870 true, ifa); 871 872 if (ret < 0) { 873 inet_free_ifa(ifa); 874 return ret; 875 } 876 } 877 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); 878 } else { 879 inet_free_ifa(ifa); 880 881 if (nlh->nlmsg_flags & NLM_F_EXCL || 882 !(nlh->nlmsg_flags & NLM_F_REPLACE)) 883 return -EEXIST; 884 ifa = ifa_existing; 885 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 886 cancel_delayed_work(&check_lifetime_work); 887 queue_delayed_work(system_power_efficient_wq, 888 &check_lifetime_work, 0); 889 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); 890 } 891 return 0; 892 } 893 894 /* 895 * Determine a default network mask, based on the IP address. 896 */ 897 898 static int inet_abc_len(__be32 addr) 899 { 900 int rc = -1; /* Something else, probably a multicast. */ 901 902 if (ipv4_is_zeronet(addr)) 903 rc = 0; 904 else { 905 __u32 haddr = ntohl(addr); 906 907 if (IN_CLASSA(haddr)) 908 rc = 8; 909 else if (IN_CLASSB(haddr)) 910 rc = 16; 911 else if (IN_CLASSC(haddr)) 912 rc = 24; 913 } 914 915 return rc; 916 } 917 918 919 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 920 { 921 struct ifreq ifr; 922 struct sockaddr_in sin_orig; 923 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 924 struct in_device *in_dev; 925 struct in_ifaddr **ifap = NULL; 926 struct in_ifaddr *ifa = NULL; 927 struct net_device *dev; 928 char *colon; 929 int ret = -EFAULT; 930 int tryaddrmatch = 0; 931 932 /* 933 * Fetch the caller's info block into kernel space 934 */ 935 936 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 937 goto out; 938 ifr.ifr_name[IFNAMSIZ - 1] = 0; 939 940 /* save original address for comparison */ 941 memcpy(&sin_orig, sin, sizeof(*sin)); 942 943 colon = strchr(ifr.ifr_name, ':'); 944 if (colon) 945 *colon = 0; 946 947 dev_load(net, ifr.ifr_name); 948 949 switch (cmd) { 950 case SIOCGIFADDR: /* Get interface address */ 951 case SIOCGIFBRDADDR: /* Get the broadcast address */ 952 case SIOCGIFDSTADDR: /* Get the destination address */ 953 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 954 /* Note that these ioctls will not sleep, 955 so that we do not impose a lock. 956 One day we will be forced to put shlock here (I mean SMP) 957 */ 958 tryaddrmatch = (sin_orig.sin_family == AF_INET); 959 memset(sin, 0, sizeof(*sin)); 960 sin->sin_family = AF_INET; 961 break; 962 963 case SIOCSIFFLAGS: 964 ret = -EPERM; 965 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 966 goto out; 967 break; 968 case SIOCSIFADDR: /* Set interface address (and family) */ 969 case SIOCSIFBRDADDR: /* Set the broadcast address */ 970 case SIOCSIFDSTADDR: /* Set the destination address */ 971 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 972 ret = -EPERM; 973 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 974 goto out; 975 ret = -EINVAL; 976 if (sin->sin_family != AF_INET) 977 goto out; 978 break; 979 default: 980 ret = -EINVAL; 981 goto out; 982 } 983 984 rtnl_lock(); 985 986 ret = -ENODEV; 987 dev = __dev_get_by_name(net, ifr.ifr_name); 988 if (!dev) 989 goto done; 990 991 if (colon) 992 *colon = ':'; 993 994 in_dev = __in_dev_get_rtnl(dev); 995 if (in_dev) { 996 if (tryaddrmatch) { 997 /* Matthias Andree */ 998 /* compare label and address (4.4BSD style) */ 999 /* note: we only do this for a limited set of ioctls 1000 and only if the original address family was AF_INET. 1001 This is checked above. */ 1002 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1003 ifap = &ifa->ifa_next) { 1004 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 1005 sin_orig.sin_addr.s_addr == 1006 ifa->ifa_local) { 1007 break; /* found */ 1008 } 1009 } 1010 } 1011 /* we didn't get a match, maybe the application is 1012 4.3BSD-style and passed in junk so we fall back to 1013 comparing just the label */ 1014 if (!ifa) { 1015 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1016 ifap = &ifa->ifa_next) 1017 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 1018 break; 1019 } 1020 } 1021 1022 ret = -EADDRNOTAVAIL; 1023 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 1024 goto done; 1025 1026 switch (cmd) { 1027 case SIOCGIFADDR: /* Get interface address */ 1028 sin->sin_addr.s_addr = ifa->ifa_local; 1029 goto rarok; 1030 1031 case SIOCGIFBRDADDR: /* Get the broadcast address */ 1032 sin->sin_addr.s_addr = ifa->ifa_broadcast; 1033 goto rarok; 1034 1035 case SIOCGIFDSTADDR: /* Get the destination address */ 1036 sin->sin_addr.s_addr = ifa->ifa_address; 1037 goto rarok; 1038 1039 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 1040 sin->sin_addr.s_addr = ifa->ifa_mask; 1041 goto rarok; 1042 1043 case SIOCSIFFLAGS: 1044 if (colon) { 1045 ret = -EADDRNOTAVAIL; 1046 if (!ifa) 1047 break; 1048 ret = 0; 1049 if (!(ifr.ifr_flags & IFF_UP)) 1050 inet_del_ifa(in_dev, ifap, 1); 1051 break; 1052 } 1053 ret = dev_change_flags(dev, ifr.ifr_flags); 1054 break; 1055 1056 case SIOCSIFADDR: /* Set interface address (and family) */ 1057 ret = -EINVAL; 1058 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 1059 break; 1060 1061 if (!ifa) { 1062 ret = -ENOBUFS; 1063 ifa = inet_alloc_ifa(); 1064 if (!ifa) 1065 break; 1066 INIT_HLIST_NODE(&ifa->hash); 1067 if (colon) 1068 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 1069 else 1070 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1071 } else { 1072 ret = 0; 1073 if (ifa->ifa_local == sin->sin_addr.s_addr) 1074 break; 1075 inet_del_ifa(in_dev, ifap, 0); 1076 ifa->ifa_broadcast = 0; 1077 ifa->ifa_scope = 0; 1078 } 1079 1080 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 1081 1082 if (!(dev->flags & IFF_POINTOPOINT)) { 1083 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 1084 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 1085 if ((dev->flags & IFF_BROADCAST) && 1086 ifa->ifa_prefixlen < 31) 1087 ifa->ifa_broadcast = ifa->ifa_address | 1088 ~ifa->ifa_mask; 1089 } else { 1090 ifa->ifa_prefixlen = 32; 1091 ifa->ifa_mask = inet_make_mask(32); 1092 } 1093 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 1094 ret = inet_set_ifa(dev, ifa); 1095 break; 1096 1097 case SIOCSIFBRDADDR: /* Set the broadcast address */ 1098 ret = 0; 1099 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 1100 inet_del_ifa(in_dev, ifap, 0); 1101 ifa->ifa_broadcast = sin->sin_addr.s_addr; 1102 inet_insert_ifa(ifa); 1103 } 1104 break; 1105 1106 case SIOCSIFDSTADDR: /* Set the destination address */ 1107 ret = 0; 1108 if (ifa->ifa_address == sin->sin_addr.s_addr) 1109 break; 1110 ret = -EINVAL; 1111 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 1112 break; 1113 ret = 0; 1114 inet_del_ifa(in_dev, ifap, 0); 1115 ifa->ifa_address = sin->sin_addr.s_addr; 1116 inet_insert_ifa(ifa); 1117 break; 1118 1119 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 1120 1121 /* 1122 * The mask we set must be legal. 1123 */ 1124 ret = -EINVAL; 1125 if (bad_mask(sin->sin_addr.s_addr, 0)) 1126 break; 1127 ret = 0; 1128 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 1129 __be32 old_mask = ifa->ifa_mask; 1130 inet_del_ifa(in_dev, ifap, 0); 1131 ifa->ifa_mask = sin->sin_addr.s_addr; 1132 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 1133 1134 /* See if current broadcast address matches 1135 * with current netmask, then recalculate 1136 * the broadcast address. Otherwise it's a 1137 * funny address, so don't touch it since 1138 * the user seems to know what (s)he's doing... 1139 */ 1140 if ((dev->flags & IFF_BROADCAST) && 1141 (ifa->ifa_prefixlen < 31) && 1142 (ifa->ifa_broadcast == 1143 (ifa->ifa_local|~old_mask))) { 1144 ifa->ifa_broadcast = (ifa->ifa_local | 1145 ~sin->sin_addr.s_addr); 1146 } 1147 inet_insert_ifa(ifa); 1148 } 1149 break; 1150 } 1151 done: 1152 rtnl_unlock(); 1153 out: 1154 return ret; 1155 rarok: 1156 rtnl_unlock(); 1157 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 1158 goto out; 1159 } 1160 1161 static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 1162 { 1163 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1164 struct in_ifaddr *ifa; 1165 struct ifreq ifr; 1166 int done = 0; 1167 1168 if (!in_dev) 1169 goto out; 1170 1171 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1172 if (!buf) { 1173 done += sizeof(ifr); 1174 continue; 1175 } 1176 if (len < (int) sizeof(ifr)) 1177 break; 1178 memset(&ifr, 0, sizeof(struct ifreq)); 1179 strcpy(ifr.ifr_name, ifa->ifa_label); 1180 1181 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 1182 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 1183 ifa->ifa_local; 1184 1185 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 1186 done = -EFAULT; 1187 break; 1188 } 1189 buf += sizeof(struct ifreq); 1190 len -= sizeof(struct ifreq); 1191 done += sizeof(struct ifreq); 1192 } 1193 out: 1194 return done; 1195 } 1196 1197 static __be32 in_dev_select_addr(const struct in_device *in_dev, 1198 int scope) 1199 { 1200 for_primary_ifa(in_dev) { 1201 if (ifa->ifa_scope != RT_SCOPE_LINK && 1202 ifa->ifa_scope <= scope) 1203 return ifa->ifa_local; 1204 } endfor_ifa(in_dev); 1205 1206 return 0; 1207 } 1208 1209 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 1210 { 1211 __be32 addr = 0; 1212 struct in_device *in_dev; 1213 struct net *net = dev_net(dev); 1214 int master_idx; 1215 1216 rcu_read_lock(); 1217 in_dev = __in_dev_get_rcu(dev); 1218 if (!in_dev) 1219 goto no_in_dev; 1220 1221 for_primary_ifa(in_dev) { 1222 if (ifa->ifa_scope > scope) 1223 continue; 1224 if (!dst || inet_ifa_match(dst, ifa)) { 1225 addr = ifa->ifa_local; 1226 break; 1227 } 1228 if (!addr) 1229 addr = ifa->ifa_local; 1230 } endfor_ifa(in_dev); 1231 1232 if (addr) 1233 goto out_unlock; 1234 no_in_dev: 1235 master_idx = l3mdev_master_ifindex_rcu(dev); 1236 1237 /* For VRFs, the VRF device takes the place of the loopback device, 1238 * with addresses on it being preferred. Note in such cases the 1239 * loopback device will be among the devices that fail the master_idx 1240 * equality check in the loop below. 1241 */ 1242 if (master_idx && 1243 (dev = dev_get_by_index_rcu(net, master_idx)) && 1244 (in_dev = __in_dev_get_rcu(dev))) { 1245 addr = in_dev_select_addr(in_dev, scope); 1246 if (addr) 1247 goto out_unlock; 1248 } 1249 1250 /* Not loopback addresses on loopback should be preferred 1251 in this case. It is important that lo is the first interface 1252 in dev_base list. 1253 */ 1254 for_each_netdev_rcu(net, dev) { 1255 if (l3mdev_master_ifindex_rcu(dev) != master_idx) 1256 continue; 1257 1258 in_dev = __in_dev_get_rcu(dev); 1259 if (!in_dev) 1260 continue; 1261 1262 addr = in_dev_select_addr(in_dev, scope); 1263 if (addr) 1264 goto out_unlock; 1265 } 1266 out_unlock: 1267 rcu_read_unlock(); 1268 return addr; 1269 } 1270 EXPORT_SYMBOL(inet_select_addr); 1271 1272 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 1273 __be32 local, int scope) 1274 { 1275 int same = 0; 1276 __be32 addr = 0; 1277 1278 for_ifa(in_dev) { 1279 if (!addr && 1280 (local == ifa->ifa_local || !local) && 1281 ifa->ifa_scope <= scope) { 1282 addr = ifa->ifa_local; 1283 if (same) 1284 break; 1285 } 1286 if (!same) { 1287 same = (!local || inet_ifa_match(local, ifa)) && 1288 (!dst || inet_ifa_match(dst, ifa)); 1289 if (same && addr) { 1290 if (local || !dst) 1291 break; 1292 /* Is the selected addr into dst subnet? */ 1293 if (inet_ifa_match(addr, ifa)) 1294 break; 1295 /* No, then can we use new local src? */ 1296 if (ifa->ifa_scope <= scope) { 1297 addr = ifa->ifa_local; 1298 break; 1299 } 1300 /* search for large dst subnet for addr */ 1301 same = 0; 1302 } 1303 } 1304 } endfor_ifa(in_dev); 1305 1306 return same ? addr : 0; 1307 } 1308 1309 /* 1310 * Confirm that local IP address exists using wildcards: 1311 * - net: netns to check, cannot be NULL 1312 * - in_dev: only on this interface, NULL=any interface 1313 * - dst: only in the same subnet as dst, 0=any dst 1314 * - local: address, 0=autoselect the local address 1315 * - scope: maximum allowed scope value for the local address 1316 */ 1317 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, 1318 __be32 dst, __be32 local, int scope) 1319 { 1320 __be32 addr = 0; 1321 struct net_device *dev; 1322 1323 if (in_dev) 1324 return confirm_addr_indev(in_dev, dst, local, scope); 1325 1326 rcu_read_lock(); 1327 for_each_netdev_rcu(net, dev) { 1328 in_dev = __in_dev_get_rcu(dev); 1329 if (in_dev) { 1330 addr = confirm_addr_indev(in_dev, dst, local, scope); 1331 if (addr) 1332 break; 1333 } 1334 } 1335 rcu_read_unlock(); 1336 1337 return addr; 1338 } 1339 EXPORT_SYMBOL(inet_confirm_addr); 1340 1341 /* 1342 * Device notifier 1343 */ 1344 1345 int register_inetaddr_notifier(struct notifier_block *nb) 1346 { 1347 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1348 } 1349 EXPORT_SYMBOL(register_inetaddr_notifier); 1350 1351 int unregister_inetaddr_notifier(struct notifier_block *nb) 1352 { 1353 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1354 } 1355 EXPORT_SYMBOL(unregister_inetaddr_notifier); 1356 1357 /* Rename ifa_labels for a device name change. Make some effort to preserve 1358 * existing alias numbering and to create unique labels if possible. 1359 */ 1360 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1361 { 1362 struct in_ifaddr *ifa; 1363 int named = 0; 1364 1365 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1366 char old[IFNAMSIZ], *dot; 1367 1368 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1369 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1370 if (named++ == 0) 1371 goto skip; 1372 dot = strchr(old, ':'); 1373 if (!dot) { 1374 sprintf(old, ":%d", named); 1375 dot = old; 1376 } 1377 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1378 strcat(ifa->ifa_label, dot); 1379 else 1380 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1381 skip: 1382 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1383 } 1384 } 1385 1386 static bool inetdev_valid_mtu(unsigned int mtu) 1387 { 1388 return mtu >= 68; 1389 } 1390 1391 static void inetdev_send_gratuitous_arp(struct net_device *dev, 1392 struct in_device *in_dev) 1393 1394 { 1395 struct in_ifaddr *ifa; 1396 1397 for (ifa = in_dev->ifa_list; ifa; 1398 ifa = ifa->ifa_next) { 1399 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1400 ifa->ifa_local, dev, 1401 ifa->ifa_local, NULL, 1402 dev->dev_addr, NULL); 1403 } 1404 } 1405 1406 /* Called only under RTNL semaphore */ 1407 1408 static int inetdev_event(struct notifier_block *this, unsigned long event, 1409 void *ptr) 1410 { 1411 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1412 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1413 1414 ASSERT_RTNL(); 1415 1416 if (!in_dev) { 1417 if (event == NETDEV_REGISTER) { 1418 in_dev = inetdev_init(dev); 1419 if (IS_ERR(in_dev)) 1420 return notifier_from_errno(PTR_ERR(in_dev)); 1421 if (dev->flags & IFF_LOOPBACK) { 1422 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1423 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1424 } 1425 } else if (event == NETDEV_CHANGEMTU) { 1426 /* Re-enabling IP */ 1427 if (inetdev_valid_mtu(dev->mtu)) 1428 in_dev = inetdev_init(dev); 1429 } 1430 goto out; 1431 } 1432 1433 switch (event) { 1434 case NETDEV_REGISTER: 1435 pr_debug("%s: bug\n", __func__); 1436 RCU_INIT_POINTER(dev->ip_ptr, NULL); 1437 break; 1438 case NETDEV_UP: 1439 if (!inetdev_valid_mtu(dev->mtu)) 1440 break; 1441 if (dev->flags & IFF_LOOPBACK) { 1442 struct in_ifaddr *ifa = inet_alloc_ifa(); 1443 1444 if (ifa) { 1445 INIT_HLIST_NODE(&ifa->hash); 1446 ifa->ifa_local = 1447 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1448 ifa->ifa_prefixlen = 8; 1449 ifa->ifa_mask = inet_make_mask(8); 1450 in_dev_hold(in_dev); 1451 ifa->ifa_dev = in_dev; 1452 ifa->ifa_scope = RT_SCOPE_HOST; 1453 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1454 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, 1455 INFINITY_LIFE_TIME); 1456 ipv4_devconf_setall(in_dev); 1457 neigh_parms_data_state_setall(in_dev->arp_parms); 1458 inet_insert_ifa(ifa); 1459 } 1460 } 1461 ip_mc_up(in_dev); 1462 /* fall through */ 1463 case NETDEV_CHANGEADDR: 1464 if (!IN_DEV_ARP_NOTIFY(in_dev)) 1465 break; 1466 /* fall through */ 1467 case NETDEV_NOTIFY_PEERS: 1468 /* Send gratuitous ARP to notify of link change */ 1469 inetdev_send_gratuitous_arp(dev, in_dev); 1470 break; 1471 case NETDEV_DOWN: 1472 ip_mc_down(in_dev); 1473 break; 1474 case NETDEV_PRE_TYPE_CHANGE: 1475 ip_mc_unmap(in_dev); 1476 break; 1477 case NETDEV_POST_TYPE_CHANGE: 1478 ip_mc_remap(in_dev); 1479 break; 1480 case NETDEV_CHANGEMTU: 1481 if (inetdev_valid_mtu(dev->mtu)) 1482 break; 1483 /* disable IP when MTU is not enough */ 1484 case NETDEV_UNREGISTER: 1485 inetdev_destroy(in_dev); 1486 break; 1487 case NETDEV_CHANGENAME: 1488 /* Do not notify about label change, this event is 1489 * not interesting to applications using netlink. 1490 */ 1491 inetdev_changename(dev, in_dev); 1492 1493 devinet_sysctl_unregister(in_dev); 1494 devinet_sysctl_register(in_dev); 1495 break; 1496 } 1497 out: 1498 return NOTIFY_DONE; 1499 } 1500 1501 static struct notifier_block ip_netdev_notifier = { 1502 .notifier_call = inetdev_event, 1503 }; 1504 1505 static size_t inet_nlmsg_size(void) 1506 { 1507 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1508 + nla_total_size(4) /* IFA_ADDRESS */ 1509 + nla_total_size(4) /* IFA_LOCAL */ 1510 + nla_total_size(4) /* IFA_BROADCAST */ 1511 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ 1512 + nla_total_size(4) /* IFA_FLAGS */ 1513 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ 1514 } 1515 1516 static inline u32 cstamp_delta(unsigned long cstamp) 1517 { 1518 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; 1519 } 1520 1521 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, 1522 unsigned long tstamp, u32 preferred, u32 valid) 1523 { 1524 struct ifa_cacheinfo ci; 1525 1526 ci.cstamp = cstamp_delta(cstamp); 1527 ci.tstamp = cstamp_delta(tstamp); 1528 ci.ifa_prefered = preferred; 1529 ci.ifa_valid = valid; 1530 1531 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); 1532 } 1533 1534 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1535 u32 portid, u32 seq, int event, unsigned int flags) 1536 { 1537 struct ifaddrmsg *ifm; 1538 struct nlmsghdr *nlh; 1539 u32 preferred, valid; 1540 1541 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); 1542 if (!nlh) 1543 return -EMSGSIZE; 1544 1545 ifm = nlmsg_data(nlh); 1546 ifm->ifa_family = AF_INET; 1547 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1548 ifm->ifa_flags = ifa->ifa_flags; 1549 ifm->ifa_scope = ifa->ifa_scope; 1550 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1551 1552 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { 1553 preferred = ifa->ifa_preferred_lft; 1554 valid = ifa->ifa_valid_lft; 1555 if (preferred != INFINITY_LIFE_TIME) { 1556 long tval = (jiffies - ifa->ifa_tstamp) / HZ; 1557 1558 if (preferred > tval) 1559 preferred -= tval; 1560 else 1561 preferred = 0; 1562 if (valid != INFINITY_LIFE_TIME) { 1563 if (valid > tval) 1564 valid -= tval; 1565 else 1566 valid = 0; 1567 } 1568 } 1569 } else { 1570 preferred = INFINITY_LIFE_TIME; 1571 valid = INFINITY_LIFE_TIME; 1572 } 1573 if ((ifa->ifa_address && 1574 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) || 1575 (ifa->ifa_local && 1576 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) || 1577 (ifa->ifa_broadcast && 1578 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || 1579 (ifa->ifa_label[0] && 1580 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || 1581 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || 1582 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, 1583 preferred, valid)) 1584 goto nla_put_failure; 1585 1586 nlmsg_end(skb, nlh); 1587 return 0; 1588 1589 nla_put_failure: 1590 nlmsg_cancel(skb, nlh); 1591 return -EMSGSIZE; 1592 } 1593 1594 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1595 { 1596 struct net *net = sock_net(skb->sk); 1597 int h, s_h; 1598 int idx, s_idx; 1599 int ip_idx, s_ip_idx; 1600 struct net_device *dev; 1601 struct in_device *in_dev; 1602 struct in_ifaddr *ifa; 1603 struct hlist_head *head; 1604 1605 s_h = cb->args[0]; 1606 s_idx = idx = cb->args[1]; 1607 s_ip_idx = ip_idx = cb->args[2]; 1608 1609 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1610 idx = 0; 1611 head = &net->dev_index_head[h]; 1612 rcu_read_lock(); 1613 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 1614 net->dev_base_seq; 1615 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1616 if (idx < s_idx) 1617 goto cont; 1618 if (h > s_h || idx > s_idx) 1619 s_ip_idx = 0; 1620 in_dev = __in_dev_get_rcu(dev); 1621 if (!in_dev) 1622 goto cont; 1623 1624 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1625 ifa = ifa->ifa_next, ip_idx++) { 1626 if (ip_idx < s_ip_idx) 1627 continue; 1628 if (inet_fill_ifaddr(skb, ifa, 1629 NETLINK_CB(cb->skb).portid, 1630 cb->nlh->nlmsg_seq, 1631 RTM_NEWADDR, NLM_F_MULTI) < 0) { 1632 rcu_read_unlock(); 1633 goto done; 1634 } 1635 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1636 } 1637 cont: 1638 idx++; 1639 } 1640 rcu_read_unlock(); 1641 } 1642 1643 done: 1644 cb->args[0] = h; 1645 cb->args[1] = idx; 1646 cb->args[2] = ip_idx; 1647 1648 return skb->len; 1649 } 1650 1651 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1652 u32 portid) 1653 { 1654 struct sk_buff *skb; 1655 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1656 int err = -ENOBUFS; 1657 struct net *net; 1658 1659 net = dev_net(ifa->ifa_dev->dev); 1660 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1661 if (!skb) 1662 goto errout; 1663 1664 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); 1665 if (err < 0) { 1666 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1667 WARN_ON(err == -EMSGSIZE); 1668 kfree_skb(skb); 1669 goto errout; 1670 } 1671 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1672 return; 1673 errout: 1674 if (err < 0) 1675 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1676 } 1677 1678 static size_t inet_get_link_af_size(const struct net_device *dev, 1679 u32 ext_filter_mask) 1680 { 1681 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1682 1683 if (!in_dev) 1684 return 0; 1685 1686 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1687 } 1688 1689 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev, 1690 u32 ext_filter_mask) 1691 { 1692 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1693 struct nlattr *nla; 1694 int i; 1695 1696 if (!in_dev) 1697 return -ENODATA; 1698 1699 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1700 if (!nla) 1701 return -EMSGSIZE; 1702 1703 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1704 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1705 1706 return 0; 1707 } 1708 1709 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1710 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1711 }; 1712 1713 static int inet_validate_link_af(const struct net_device *dev, 1714 const struct nlattr *nla) 1715 { 1716 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1717 int err, rem; 1718 1719 if (dev && !__in_dev_get_rtnl(dev)) 1720 return -EAFNOSUPPORT; 1721 1722 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); 1723 if (err < 0) 1724 return err; 1725 1726 if (tb[IFLA_INET_CONF]) { 1727 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1728 int cfgid = nla_type(a); 1729 1730 if (nla_len(a) < 4) 1731 return -EINVAL; 1732 1733 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1734 return -EINVAL; 1735 } 1736 } 1737 1738 return 0; 1739 } 1740 1741 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1742 { 1743 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1744 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1745 int rem; 1746 1747 if (!in_dev) 1748 return -EAFNOSUPPORT; 1749 1750 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) 1751 BUG(); 1752 1753 if (tb[IFLA_INET_CONF]) { 1754 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1755 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1756 } 1757 1758 return 0; 1759 } 1760 1761 static int inet_netconf_msgsize_devconf(int type) 1762 { 1763 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 1764 + nla_total_size(4); /* NETCONFA_IFINDEX */ 1765 bool all = false; 1766 1767 if (type == NETCONFA_ALL) 1768 all = true; 1769 1770 if (all || type == NETCONFA_FORWARDING) 1771 size += nla_total_size(4); 1772 if (all || type == NETCONFA_RP_FILTER) 1773 size += nla_total_size(4); 1774 if (all || type == NETCONFA_MC_FORWARDING) 1775 size += nla_total_size(4); 1776 if (all || type == NETCONFA_PROXY_NEIGH) 1777 size += nla_total_size(4); 1778 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) 1779 size += nla_total_size(4); 1780 1781 return size; 1782 } 1783 1784 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, 1785 struct ipv4_devconf *devconf, u32 portid, 1786 u32 seq, int event, unsigned int flags, 1787 int type) 1788 { 1789 struct nlmsghdr *nlh; 1790 struct netconfmsg *ncm; 1791 bool all = false; 1792 1793 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 1794 flags); 1795 if (!nlh) 1796 return -EMSGSIZE; 1797 1798 if (type == NETCONFA_ALL) 1799 all = true; 1800 1801 ncm = nlmsg_data(nlh); 1802 ncm->ncm_family = AF_INET; 1803 1804 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) 1805 goto nla_put_failure; 1806 1807 if (!devconf) 1808 goto out; 1809 1810 if ((all || type == NETCONFA_FORWARDING) && 1811 nla_put_s32(skb, NETCONFA_FORWARDING, 1812 IPV4_DEVCONF(*devconf, FORWARDING)) < 0) 1813 goto nla_put_failure; 1814 if ((all || type == NETCONFA_RP_FILTER) && 1815 nla_put_s32(skb, NETCONFA_RP_FILTER, 1816 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) 1817 goto nla_put_failure; 1818 if ((all || type == NETCONFA_MC_FORWARDING) && 1819 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 1820 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) 1821 goto nla_put_failure; 1822 if ((all || type == NETCONFA_PROXY_NEIGH) && 1823 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, 1824 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) 1825 goto nla_put_failure; 1826 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && 1827 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 1828 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) 1829 goto nla_put_failure; 1830 1831 out: 1832 nlmsg_end(skb, nlh); 1833 return 0; 1834 1835 nla_put_failure: 1836 nlmsg_cancel(skb, nlh); 1837 return -EMSGSIZE; 1838 } 1839 1840 void inet_netconf_notify_devconf(struct net *net, int event, int type, 1841 int ifindex, struct ipv4_devconf *devconf) 1842 { 1843 struct sk_buff *skb; 1844 int err = -ENOBUFS; 1845 1846 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL); 1847 if (!skb) 1848 goto errout; 1849 1850 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, 1851 event, 0, type); 1852 if (err < 0) { 1853 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1854 WARN_ON(err == -EMSGSIZE); 1855 kfree_skb(skb); 1856 goto errout; 1857 } 1858 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL); 1859 return; 1860 errout: 1861 if (err < 0) 1862 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); 1863 } 1864 1865 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { 1866 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1867 [NETCONFA_FORWARDING] = { .len = sizeof(int) }, 1868 [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, 1869 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, 1870 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, 1871 }; 1872 1873 static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1874 struct nlmsghdr *nlh) 1875 { 1876 struct net *net = sock_net(in_skb->sk); 1877 struct nlattr *tb[NETCONFA_MAX+1]; 1878 struct netconfmsg *ncm; 1879 struct sk_buff *skb; 1880 struct ipv4_devconf *devconf; 1881 struct in_device *in_dev; 1882 struct net_device *dev; 1883 int ifindex; 1884 int err; 1885 1886 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 1887 devconf_ipv4_policy, NULL); 1888 if (err < 0) 1889 goto errout; 1890 1891 err = -EINVAL; 1892 if (!tb[NETCONFA_IFINDEX]) 1893 goto errout; 1894 1895 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); 1896 switch (ifindex) { 1897 case NETCONFA_IFINDEX_ALL: 1898 devconf = net->ipv4.devconf_all; 1899 break; 1900 case NETCONFA_IFINDEX_DEFAULT: 1901 devconf = net->ipv4.devconf_dflt; 1902 break; 1903 default: 1904 dev = __dev_get_by_index(net, ifindex); 1905 if (!dev) 1906 goto errout; 1907 in_dev = __in_dev_get_rtnl(dev); 1908 if (!in_dev) 1909 goto errout; 1910 devconf = &in_dev->cnf; 1911 break; 1912 } 1913 1914 err = -ENOBUFS; 1915 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); 1916 if (!skb) 1917 goto errout; 1918 1919 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 1920 NETLINK_CB(in_skb).portid, 1921 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 1922 NETCONFA_ALL); 1923 if (err < 0) { 1924 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1925 WARN_ON(err == -EMSGSIZE); 1926 kfree_skb(skb); 1927 goto errout; 1928 } 1929 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1930 errout: 1931 return err; 1932 } 1933 1934 static int inet_netconf_dump_devconf(struct sk_buff *skb, 1935 struct netlink_callback *cb) 1936 { 1937 struct net *net = sock_net(skb->sk); 1938 int h, s_h; 1939 int idx, s_idx; 1940 struct net_device *dev; 1941 struct in_device *in_dev; 1942 struct hlist_head *head; 1943 1944 s_h = cb->args[0]; 1945 s_idx = idx = cb->args[1]; 1946 1947 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1948 idx = 0; 1949 head = &net->dev_index_head[h]; 1950 rcu_read_lock(); 1951 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 1952 net->dev_base_seq; 1953 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1954 if (idx < s_idx) 1955 goto cont; 1956 in_dev = __in_dev_get_rcu(dev); 1957 if (!in_dev) 1958 goto cont; 1959 1960 if (inet_netconf_fill_devconf(skb, dev->ifindex, 1961 &in_dev->cnf, 1962 NETLINK_CB(cb->skb).portid, 1963 cb->nlh->nlmsg_seq, 1964 RTM_NEWNETCONF, 1965 NLM_F_MULTI, 1966 NETCONFA_ALL) < 0) { 1967 rcu_read_unlock(); 1968 goto done; 1969 } 1970 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1971 cont: 1972 idx++; 1973 } 1974 rcu_read_unlock(); 1975 } 1976 if (h == NETDEV_HASHENTRIES) { 1977 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, 1978 net->ipv4.devconf_all, 1979 NETLINK_CB(cb->skb).portid, 1980 cb->nlh->nlmsg_seq, 1981 RTM_NEWNETCONF, NLM_F_MULTI, 1982 NETCONFA_ALL) < 0) 1983 goto done; 1984 else 1985 h++; 1986 } 1987 if (h == NETDEV_HASHENTRIES + 1) { 1988 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, 1989 net->ipv4.devconf_dflt, 1990 NETLINK_CB(cb->skb).portid, 1991 cb->nlh->nlmsg_seq, 1992 RTM_NEWNETCONF, NLM_F_MULTI, 1993 NETCONFA_ALL) < 0) 1994 goto done; 1995 else 1996 h++; 1997 } 1998 done: 1999 cb->args[0] = h; 2000 cb->args[1] = idx; 2001 2002 return skb->len; 2003 } 2004 2005 #ifdef CONFIG_SYSCTL 2006 2007 static void devinet_copy_dflt_conf(struct net *net, int i) 2008 { 2009 struct net_device *dev; 2010 2011 rcu_read_lock(); 2012 for_each_netdev_rcu(net, dev) { 2013 struct in_device *in_dev; 2014 2015 in_dev = __in_dev_get_rcu(dev); 2016 if (in_dev && !test_bit(i, in_dev->cnf.state)) 2017 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 2018 } 2019 rcu_read_unlock(); 2020 } 2021 2022 /* called with RTNL locked */ 2023 static void inet_forward_change(struct net *net) 2024 { 2025 struct net_device *dev; 2026 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 2027 2028 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 2029 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 2030 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2031 NETCONFA_FORWARDING, 2032 NETCONFA_IFINDEX_ALL, 2033 net->ipv4.devconf_all); 2034 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2035 NETCONFA_FORWARDING, 2036 NETCONFA_IFINDEX_DEFAULT, 2037 net->ipv4.devconf_dflt); 2038 2039 for_each_netdev(net, dev) { 2040 struct in_device *in_dev; 2041 2042 if (on) 2043 dev_disable_lro(dev); 2044 2045 in_dev = __in_dev_get_rtnl(dev); 2046 if (in_dev) { 2047 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 2048 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2049 NETCONFA_FORWARDING, 2050 dev->ifindex, &in_dev->cnf); 2051 } 2052 } 2053 } 2054 2055 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf) 2056 { 2057 if (cnf == net->ipv4.devconf_dflt) 2058 return NETCONFA_IFINDEX_DEFAULT; 2059 else if (cnf == net->ipv4.devconf_all) 2060 return NETCONFA_IFINDEX_ALL; 2061 else { 2062 struct in_device *idev 2063 = container_of(cnf, struct in_device, cnf); 2064 return idev->dev->ifindex; 2065 } 2066 } 2067 2068 static int devinet_conf_proc(struct ctl_table *ctl, int write, 2069 void __user *buffer, 2070 size_t *lenp, loff_t *ppos) 2071 { 2072 int old_value = *(int *)ctl->data; 2073 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2074 int new_value = *(int *)ctl->data; 2075 2076 if (write) { 2077 struct ipv4_devconf *cnf = ctl->extra1; 2078 struct net *net = ctl->extra2; 2079 int i = (int *)ctl->data - cnf->data; 2080 int ifindex; 2081 2082 set_bit(i, cnf->state); 2083 2084 if (cnf == net->ipv4.devconf_dflt) 2085 devinet_copy_dflt_conf(net, i); 2086 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || 2087 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 2088 if ((new_value == 0) && (old_value != 0)) 2089 rt_cache_flush(net); 2090 2091 if (i == IPV4_DEVCONF_RP_FILTER - 1 && 2092 new_value != old_value) { 2093 ifindex = devinet_conf_ifindex(net, cnf); 2094 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2095 NETCONFA_RP_FILTER, 2096 ifindex, cnf); 2097 } 2098 if (i == IPV4_DEVCONF_PROXY_ARP - 1 && 2099 new_value != old_value) { 2100 ifindex = devinet_conf_ifindex(net, cnf); 2101 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2102 NETCONFA_PROXY_NEIGH, 2103 ifindex, cnf); 2104 } 2105 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && 2106 new_value != old_value) { 2107 ifindex = devinet_conf_ifindex(net, cnf); 2108 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2109 NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 2110 ifindex, cnf); 2111 } 2112 } 2113 2114 return ret; 2115 } 2116 2117 static int devinet_sysctl_forward(struct ctl_table *ctl, int write, 2118 void __user *buffer, 2119 size_t *lenp, loff_t *ppos) 2120 { 2121 int *valp = ctl->data; 2122 int val = *valp; 2123 loff_t pos = *ppos; 2124 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2125 2126 if (write && *valp != val) { 2127 struct net *net = ctl->extra2; 2128 2129 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 2130 if (!rtnl_trylock()) { 2131 /* Restore the original values before restarting */ 2132 *valp = val; 2133 *ppos = pos; 2134 return restart_syscall(); 2135 } 2136 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 2137 inet_forward_change(net); 2138 } else { 2139 struct ipv4_devconf *cnf = ctl->extra1; 2140 struct in_device *idev = 2141 container_of(cnf, struct in_device, cnf); 2142 if (*valp) 2143 dev_disable_lro(idev->dev); 2144 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2145 NETCONFA_FORWARDING, 2146 idev->dev->ifindex, 2147 cnf); 2148 } 2149 rtnl_unlock(); 2150 rt_cache_flush(net); 2151 } else 2152 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2153 NETCONFA_FORWARDING, 2154 NETCONFA_IFINDEX_DEFAULT, 2155 net->ipv4.devconf_dflt); 2156 } 2157 2158 return ret; 2159 } 2160 2161 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, 2162 void __user *buffer, 2163 size_t *lenp, loff_t *ppos) 2164 { 2165 int *valp = ctl->data; 2166 int val = *valp; 2167 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2168 struct net *net = ctl->extra2; 2169 2170 if (write && *valp != val) 2171 rt_cache_flush(net); 2172 2173 return ret; 2174 } 2175 2176 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 2177 { \ 2178 .procname = name, \ 2179 .data = ipv4_devconf.data + \ 2180 IPV4_DEVCONF_ ## attr - 1, \ 2181 .maxlen = sizeof(int), \ 2182 .mode = mval, \ 2183 .proc_handler = proc, \ 2184 .extra1 = &ipv4_devconf, \ 2185 } 2186 2187 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 2188 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 2189 2190 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 2191 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 2192 2193 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 2194 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 2195 2196 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 2197 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 2198 2199 static struct devinet_sysctl_table { 2200 struct ctl_table_header *sysctl_header; 2201 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 2202 } devinet_sysctl = { 2203 .devinet_vars = { 2204 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 2205 devinet_sysctl_forward), 2206 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 2207 2208 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 2209 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 2210 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 2211 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 2212 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 2213 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 2214 "accept_source_route"), 2215 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 2216 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 2217 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 2218 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 2219 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 2220 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 2221 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 2222 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 2223 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 2224 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 2225 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 2226 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 2227 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 2228 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, 2229 "force_igmp_version"), 2230 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL, 2231 "igmpv2_unsolicited_report_interval"), 2232 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, 2233 "igmpv3_unsolicited_report_interval"), 2234 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, 2235 "ignore_routes_with_linkdown"), 2236 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP, 2237 "drop_gratuitous_arp"), 2238 2239 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2240 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2241 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 2242 "promote_secondaries"), 2243 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 2244 "route_localnet"), 2245 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST, 2246 "drop_unicast_in_l2_multicast"), 2247 }, 2248 }; 2249 2250 static int __devinet_sysctl_register(struct net *net, char *dev_name, 2251 int ifindex, struct ipv4_devconf *p) 2252 { 2253 int i; 2254 struct devinet_sysctl_table *t; 2255 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; 2256 2257 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 2258 if (!t) 2259 goto out; 2260 2261 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 2262 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 2263 t->devinet_vars[i].extra1 = p; 2264 t->devinet_vars[i].extra2 = net; 2265 } 2266 2267 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); 2268 2269 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); 2270 if (!t->sysctl_header) 2271 goto free; 2272 2273 p->sysctl = t; 2274 2275 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, 2276 ifindex, p); 2277 return 0; 2278 2279 free: 2280 kfree(t); 2281 out: 2282 return -ENOBUFS; 2283 } 2284 2285 static void __devinet_sysctl_unregister(struct net *net, 2286 struct ipv4_devconf *cnf, int ifindex) 2287 { 2288 struct devinet_sysctl_table *t = cnf->sysctl; 2289 2290 if (t) { 2291 cnf->sysctl = NULL; 2292 unregister_net_sysctl_table(t->sysctl_header); 2293 kfree(t); 2294 } 2295 2296 inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); 2297 } 2298 2299 static int devinet_sysctl_register(struct in_device *idev) 2300 { 2301 int err; 2302 2303 if (!sysctl_dev_name_is_allowed(idev->dev->name)) 2304 return -EINVAL; 2305 2306 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); 2307 if (err) 2308 return err; 2309 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 2310 idev->dev->ifindex, &idev->cnf); 2311 if (err) 2312 neigh_sysctl_unregister(idev->arp_parms); 2313 return err; 2314 } 2315 2316 static void devinet_sysctl_unregister(struct in_device *idev) 2317 { 2318 struct net *net = dev_net(idev->dev); 2319 2320 __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex); 2321 neigh_sysctl_unregister(idev->arp_parms); 2322 } 2323 2324 static struct ctl_table ctl_forward_entry[] = { 2325 { 2326 .procname = "ip_forward", 2327 .data = &ipv4_devconf.data[ 2328 IPV4_DEVCONF_FORWARDING - 1], 2329 .maxlen = sizeof(int), 2330 .mode = 0644, 2331 .proc_handler = devinet_sysctl_forward, 2332 .extra1 = &ipv4_devconf, 2333 .extra2 = &init_net, 2334 }, 2335 { }, 2336 }; 2337 #endif 2338 2339 static __net_init int devinet_init_net(struct net *net) 2340 { 2341 int err; 2342 struct ipv4_devconf *all, *dflt; 2343 #ifdef CONFIG_SYSCTL 2344 struct ctl_table *tbl = ctl_forward_entry; 2345 struct ctl_table_header *forw_hdr; 2346 #endif 2347 2348 err = -ENOMEM; 2349 all = &ipv4_devconf; 2350 dflt = &ipv4_devconf_dflt; 2351 2352 if (!net_eq(net, &init_net)) { 2353 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 2354 if (!all) 2355 goto err_alloc_all; 2356 2357 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 2358 if (!dflt) 2359 goto err_alloc_dflt; 2360 2361 #ifdef CONFIG_SYSCTL 2362 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 2363 if (!tbl) 2364 goto err_alloc_ctl; 2365 2366 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 2367 tbl[0].extra1 = all; 2368 tbl[0].extra2 = net; 2369 #endif 2370 } 2371 2372 #ifdef CONFIG_SYSCTL 2373 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all); 2374 if (err < 0) 2375 goto err_reg_all; 2376 2377 err = __devinet_sysctl_register(net, "default", 2378 NETCONFA_IFINDEX_DEFAULT, dflt); 2379 if (err < 0) 2380 goto err_reg_dflt; 2381 2382 err = -ENOMEM; 2383 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); 2384 if (!forw_hdr) 2385 goto err_reg_ctl; 2386 net->ipv4.forw_hdr = forw_hdr; 2387 #endif 2388 2389 net->ipv4.devconf_all = all; 2390 net->ipv4.devconf_dflt = dflt; 2391 return 0; 2392 2393 #ifdef CONFIG_SYSCTL 2394 err_reg_ctl: 2395 __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT); 2396 err_reg_dflt: 2397 __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); 2398 err_reg_all: 2399 if (tbl != ctl_forward_entry) 2400 kfree(tbl); 2401 err_alloc_ctl: 2402 #endif 2403 if (dflt != &ipv4_devconf_dflt) 2404 kfree(dflt); 2405 err_alloc_dflt: 2406 if (all != &ipv4_devconf) 2407 kfree(all); 2408 err_alloc_all: 2409 return err; 2410 } 2411 2412 static __net_exit void devinet_exit_net(struct net *net) 2413 { 2414 #ifdef CONFIG_SYSCTL 2415 struct ctl_table *tbl; 2416 2417 tbl = net->ipv4.forw_hdr->ctl_table_arg; 2418 unregister_net_sysctl_table(net->ipv4.forw_hdr); 2419 __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, 2420 NETCONFA_IFINDEX_DEFAULT); 2421 __devinet_sysctl_unregister(net, net->ipv4.devconf_all, 2422 NETCONFA_IFINDEX_ALL); 2423 kfree(tbl); 2424 #endif 2425 kfree(net->ipv4.devconf_dflt); 2426 kfree(net->ipv4.devconf_all); 2427 } 2428 2429 static __net_initdata struct pernet_operations devinet_ops = { 2430 .init = devinet_init_net, 2431 .exit = devinet_exit_net, 2432 }; 2433 2434 static struct rtnl_af_ops inet_af_ops __read_mostly = { 2435 .family = AF_INET, 2436 .fill_link_af = inet_fill_link_af, 2437 .get_link_af_size = inet_get_link_af_size, 2438 .validate_link_af = inet_validate_link_af, 2439 .set_link_af = inet_set_link_af, 2440 }; 2441 2442 void __init devinet_init(void) 2443 { 2444 int i; 2445 2446 for (i = 0; i < IN4_ADDR_HSIZE; i++) 2447 INIT_HLIST_HEAD(&inet_addr_lst[i]); 2448 2449 register_pernet_subsys(&devinet_ops); 2450 2451 register_gifconf(PF_INET, inet_gifconf); 2452 register_netdevice_notifier(&ip_netdev_notifier); 2453 2454 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); 2455 2456 rtnl_af_register(&inet_af_ops); 2457 2458 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 2459 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 2460 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 2461 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, 2462 inet_netconf_dump_devconf, NULL); 2463 } 2464