1 /* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29 #include <linux/uaccess.h> 30 #include <linux/bitops.h> 31 #include <linux/capability.h> 32 #include <linux/module.h> 33 #include <linux/types.h> 34 #include <linux/kernel.h> 35 #include <linux/sched/signal.h> 36 #include <linux/string.h> 37 #include <linux/mm.h> 38 #include <linux/socket.h> 39 #include <linux/sockios.h> 40 #include <linux/in.h> 41 #include <linux/errno.h> 42 #include <linux/interrupt.h> 43 #include <linux/if_addr.h> 44 #include <linux/if_ether.h> 45 #include <linux/inet.h> 46 #include <linux/netdevice.h> 47 #include <linux/etherdevice.h> 48 #include <linux/skbuff.h> 49 #include <linux/init.h> 50 #include <linux/notifier.h> 51 #include <linux/inetdevice.h> 52 #include <linux/igmp.h> 53 #include <linux/slab.h> 54 #include <linux/hash.h> 55 #ifdef CONFIG_SYSCTL 56 #include <linux/sysctl.h> 57 #endif 58 #include <linux/kmod.h> 59 #include <linux/netconf.h> 60 61 #include <net/arp.h> 62 #include <net/ip.h> 63 #include <net/route.h> 64 #include <net/ip_fib.h> 65 #include <net/rtnetlink.h> 66 #include <net/net_namespace.h> 67 #include <net/addrconf.h> 68 69 static struct ipv4_devconf ipv4_devconf = { 70 .data = { 71 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 72 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 73 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 74 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 75 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, 76 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, 77 }, 78 }; 79 80 static struct ipv4_devconf ipv4_devconf_dflt = { 81 .data = { 82 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 83 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 84 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 85 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 86 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 87 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, 88 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, 89 }, 90 }; 91 92 #define IPV4_DEVCONF_DFLT(net, attr) \ 93 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 94 95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 96 [IFA_LOCAL] = { .type = NLA_U32 }, 97 [IFA_ADDRESS] = { .type = NLA_U32 }, 98 [IFA_BROADCAST] = { .type = NLA_U32 }, 99 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 100 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, 101 [IFA_FLAGS] = { .type = NLA_U32 }, 102 }; 103 104 #define IN4_ADDR_HSIZE_SHIFT 8 105 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) 106 107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 108 109 static u32 inet_addr_hash(const struct net *net, __be32 addr) 110 { 111 u32 val = (__force u32) addr ^ net_hash_mix(net); 112 113 return hash_32(val, IN4_ADDR_HSIZE_SHIFT); 114 } 115 116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 117 { 118 u32 hash = inet_addr_hash(net, ifa->ifa_local); 119 120 ASSERT_RTNL(); 121 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 122 } 123 124 static void inet_hash_remove(struct in_ifaddr *ifa) 125 { 126 ASSERT_RTNL(); 127 hlist_del_init_rcu(&ifa->hash); 128 } 129 130 /** 131 * __ip_dev_find - find the first device with a given source address. 132 * @net: the net namespace 133 * @addr: the source address 134 * @devref: if true, take a reference on the found device 135 * 136 * If a caller uses devref=false, it should be protected by RCU, or RTNL 137 */ 138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 139 { 140 u32 hash = inet_addr_hash(net, addr); 141 struct net_device *result = NULL; 142 struct in_ifaddr *ifa; 143 144 rcu_read_lock(); 145 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { 146 if (ifa->ifa_local == addr) { 147 struct net_device *dev = ifa->ifa_dev->dev; 148 149 if (!net_eq(dev_net(dev), net)) 150 continue; 151 result = dev; 152 break; 153 } 154 } 155 if (!result) { 156 struct flowi4 fl4 = { .daddr = addr }; 157 struct fib_result res = { 0 }; 158 struct fib_table *local; 159 160 /* Fallback to FIB local table so that communication 161 * over loopback subnets work. 162 */ 163 local = fib_get_table(net, RT_TABLE_LOCAL); 164 if (local && 165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && 166 res.type == RTN_LOCAL) 167 result = FIB_RES_DEV(res); 168 } 169 if (result && devref) 170 dev_hold(result); 171 rcu_read_unlock(); 172 return result; 173 } 174 EXPORT_SYMBOL(__ip_dev_find); 175 176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 177 178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 180 int destroy); 181 #ifdef CONFIG_SYSCTL 182 static int devinet_sysctl_register(struct in_device *idev); 183 static void devinet_sysctl_unregister(struct in_device *idev); 184 #else 185 static int devinet_sysctl_register(struct in_device *idev) 186 { 187 return 0; 188 } 189 static void devinet_sysctl_unregister(struct in_device *idev) 190 { 191 } 192 #endif 193 194 /* Locks all the inet devices. */ 195 196 static struct in_ifaddr *inet_alloc_ifa(void) 197 { 198 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 199 } 200 201 static void inet_rcu_free_ifa(struct rcu_head *head) 202 { 203 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 204 if (ifa->ifa_dev) 205 in_dev_put(ifa->ifa_dev); 206 kfree(ifa); 207 } 208 209 static void inet_free_ifa(struct in_ifaddr *ifa) 210 { 211 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 212 } 213 214 void in_dev_finish_destroy(struct in_device *idev) 215 { 216 struct net_device *dev = idev->dev; 217 218 WARN_ON(idev->ifa_list); 219 WARN_ON(idev->mc_list); 220 kfree(rcu_dereference_protected(idev->mc_hash, 1)); 221 #ifdef NET_REFCNT_DEBUG 222 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); 223 #endif 224 dev_put(dev); 225 if (!idev->dead) 226 pr_err("Freeing alive in_device %p\n", idev); 227 else 228 kfree(idev); 229 } 230 EXPORT_SYMBOL(in_dev_finish_destroy); 231 232 static struct in_device *inetdev_init(struct net_device *dev) 233 { 234 struct in_device *in_dev; 235 int err = -ENOMEM; 236 237 ASSERT_RTNL(); 238 239 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 240 if (!in_dev) 241 goto out; 242 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 243 sizeof(in_dev->cnf)); 244 in_dev->cnf.sysctl = NULL; 245 in_dev->dev = dev; 246 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 247 if (!in_dev->arp_parms) 248 goto out_kfree; 249 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 250 dev_disable_lro(dev); 251 /* Reference in_dev->dev */ 252 dev_hold(dev); 253 /* Account for reference dev->ip_ptr (below) */ 254 in_dev_hold(in_dev); 255 256 err = devinet_sysctl_register(in_dev); 257 if (err) { 258 in_dev->dead = 1; 259 in_dev_put(in_dev); 260 in_dev = NULL; 261 goto out; 262 } 263 ip_mc_init_dev(in_dev); 264 if (dev->flags & IFF_UP) 265 ip_mc_up(in_dev); 266 267 /* we can receive as soon as ip_ptr is set -- do this last */ 268 rcu_assign_pointer(dev->ip_ptr, in_dev); 269 out: 270 return in_dev ?: ERR_PTR(err); 271 out_kfree: 272 kfree(in_dev); 273 in_dev = NULL; 274 goto out; 275 } 276 277 static void in_dev_rcu_put(struct rcu_head *head) 278 { 279 struct in_device *idev = container_of(head, struct in_device, rcu_head); 280 in_dev_put(idev); 281 } 282 283 static void inetdev_destroy(struct in_device *in_dev) 284 { 285 struct in_ifaddr *ifa; 286 struct net_device *dev; 287 288 ASSERT_RTNL(); 289 290 dev = in_dev->dev; 291 292 in_dev->dead = 1; 293 294 ip_mc_destroy_dev(in_dev); 295 296 while ((ifa = in_dev->ifa_list) != NULL) { 297 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 298 inet_free_ifa(ifa); 299 } 300 301 RCU_INIT_POINTER(dev->ip_ptr, NULL); 302 303 devinet_sysctl_unregister(in_dev); 304 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 305 arp_ifdown(dev); 306 307 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 308 } 309 310 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 311 { 312 rcu_read_lock(); 313 for_primary_ifa(in_dev) { 314 if (inet_ifa_match(a, ifa)) { 315 if (!b || inet_ifa_match(b, ifa)) { 316 rcu_read_unlock(); 317 return 1; 318 } 319 } 320 } endfor_ifa(in_dev); 321 rcu_read_unlock(); 322 return 0; 323 } 324 325 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 326 int destroy, struct nlmsghdr *nlh, u32 portid) 327 { 328 struct in_ifaddr *promote = NULL; 329 struct in_ifaddr *ifa, *ifa1 = *ifap; 330 struct in_ifaddr *last_prim = in_dev->ifa_list; 331 struct in_ifaddr *prev_prom = NULL; 332 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 333 334 ASSERT_RTNL(); 335 336 if (in_dev->dead) 337 goto no_promotions; 338 339 /* 1. Deleting primary ifaddr forces deletion all secondaries 340 * unless alias promotion is set 341 **/ 342 343 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 344 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 345 346 while ((ifa = *ifap1) != NULL) { 347 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 348 ifa1->ifa_scope <= ifa->ifa_scope) 349 last_prim = ifa; 350 351 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 352 ifa1->ifa_mask != ifa->ifa_mask || 353 !inet_ifa_match(ifa1->ifa_address, ifa)) { 354 ifap1 = &ifa->ifa_next; 355 prev_prom = ifa; 356 continue; 357 } 358 359 if (!do_promote) { 360 inet_hash_remove(ifa); 361 *ifap1 = ifa->ifa_next; 362 363 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); 364 blocking_notifier_call_chain(&inetaddr_chain, 365 NETDEV_DOWN, ifa); 366 inet_free_ifa(ifa); 367 } else { 368 promote = ifa; 369 break; 370 } 371 } 372 } 373 374 /* On promotion all secondaries from subnet are changing 375 * the primary IP, we must remove all their routes silently 376 * and later to add them back with new prefsrc. Do this 377 * while all addresses are on the device list. 378 */ 379 for (ifa = promote; ifa; ifa = ifa->ifa_next) { 380 if (ifa1->ifa_mask == ifa->ifa_mask && 381 inet_ifa_match(ifa1->ifa_address, ifa)) 382 fib_del_ifaddr(ifa, ifa1); 383 } 384 385 no_promotions: 386 /* 2. Unlink it */ 387 388 *ifap = ifa1->ifa_next; 389 inet_hash_remove(ifa1); 390 391 /* 3. Announce address deletion */ 392 393 /* Send message first, then call notifier. 394 At first sight, FIB update triggered by notifier 395 will refer to already deleted ifaddr, that could confuse 396 netlink listeners. It is not true: look, gated sees 397 that route deleted and if it still thinks that ifaddr 398 is valid, it will try to restore deleted routes... Grr. 399 So that, this order is correct. 400 */ 401 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); 402 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 403 404 if (promote) { 405 struct in_ifaddr *next_sec = promote->ifa_next; 406 407 if (prev_prom) { 408 prev_prom->ifa_next = promote->ifa_next; 409 promote->ifa_next = last_prim->ifa_next; 410 last_prim->ifa_next = promote; 411 } 412 413 promote->ifa_flags &= ~IFA_F_SECONDARY; 414 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); 415 blocking_notifier_call_chain(&inetaddr_chain, 416 NETDEV_UP, promote); 417 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { 418 if (ifa1->ifa_mask != ifa->ifa_mask || 419 !inet_ifa_match(ifa1->ifa_address, ifa)) 420 continue; 421 fib_add_ifaddr(ifa); 422 } 423 424 } 425 if (destroy) 426 inet_free_ifa(ifa1); 427 } 428 429 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 430 int destroy) 431 { 432 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 433 } 434 435 static void check_lifetime(struct work_struct *work); 436 437 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); 438 439 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 440 u32 portid) 441 { 442 struct in_device *in_dev = ifa->ifa_dev; 443 struct in_ifaddr *ifa1, **ifap, **last_primary; 444 445 ASSERT_RTNL(); 446 447 if (!ifa->ifa_local) { 448 inet_free_ifa(ifa); 449 return 0; 450 } 451 452 ifa->ifa_flags &= ~IFA_F_SECONDARY; 453 last_primary = &in_dev->ifa_list; 454 455 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 456 ifap = &ifa1->ifa_next) { 457 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 458 ifa->ifa_scope <= ifa1->ifa_scope) 459 last_primary = &ifa1->ifa_next; 460 if (ifa1->ifa_mask == ifa->ifa_mask && 461 inet_ifa_match(ifa1->ifa_address, ifa)) { 462 if (ifa1->ifa_local == ifa->ifa_local) { 463 inet_free_ifa(ifa); 464 return -EEXIST; 465 } 466 if (ifa1->ifa_scope != ifa->ifa_scope) { 467 inet_free_ifa(ifa); 468 return -EINVAL; 469 } 470 ifa->ifa_flags |= IFA_F_SECONDARY; 471 } 472 } 473 474 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 475 prandom_seed((__force u32) ifa->ifa_local); 476 ifap = last_primary; 477 } 478 479 ifa->ifa_next = *ifap; 480 *ifap = ifa; 481 482 inet_hash_insert(dev_net(in_dev->dev), ifa); 483 484 cancel_delayed_work(&check_lifetime_work); 485 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); 486 487 /* Send message first, then call notifier. 488 Notifier will trigger FIB update, so that 489 listeners of netlink will know about new ifaddr */ 490 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); 491 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 492 493 return 0; 494 } 495 496 static int inet_insert_ifa(struct in_ifaddr *ifa) 497 { 498 return __inet_insert_ifa(ifa, NULL, 0); 499 } 500 501 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 502 { 503 struct in_device *in_dev = __in_dev_get_rtnl(dev); 504 505 ASSERT_RTNL(); 506 507 if (!in_dev) { 508 inet_free_ifa(ifa); 509 return -ENOBUFS; 510 } 511 ipv4_devconf_setall(in_dev); 512 neigh_parms_data_state_setall(in_dev->arp_parms); 513 if (ifa->ifa_dev != in_dev) { 514 WARN_ON(ifa->ifa_dev); 515 in_dev_hold(in_dev); 516 ifa->ifa_dev = in_dev; 517 } 518 if (ipv4_is_loopback(ifa->ifa_local)) 519 ifa->ifa_scope = RT_SCOPE_HOST; 520 return inet_insert_ifa(ifa); 521 } 522 523 /* Caller must hold RCU or RTNL : 524 * We dont take a reference on found in_device 525 */ 526 struct in_device *inetdev_by_index(struct net *net, int ifindex) 527 { 528 struct net_device *dev; 529 struct in_device *in_dev = NULL; 530 531 rcu_read_lock(); 532 dev = dev_get_by_index_rcu(net, ifindex); 533 if (dev) 534 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 535 rcu_read_unlock(); 536 return in_dev; 537 } 538 EXPORT_SYMBOL(inetdev_by_index); 539 540 /* Called only from RTNL semaphored context. No locks. */ 541 542 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 543 __be32 mask) 544 { 545 ASSERT_RTNL(); 546 547 for_primary_ifa(in_dev) { 548 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 549 return ifa; 550 } endfor_ifa(in_dev); 551 return NULL; 552 } 553 554 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) 555 { 556 struct ip_mreqn mreq = { 557 .imr_multiaddr.s_addr = ifa->ifa_address, 558 .imr_ifindex = ifa->ifa_dev->dev->ifindex, 559 }; 560 int ret; 561 562 ASSERT_RTNL(); 563 564 lock_sock(sk); 565 if (join) 566 ret = ip_mc_join_group(sk, &mreq); 567 else 568 ret = ip_mc_leave_group(sk, &mreq); 569 release_sock(sk); 570 571 return ret; 572 } 573 574 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) 575 { 576 struct net *net = sock_net(skb->sk); 577 struct nlattr *tb[IFA_MAX+1]; 578 struct in_device *in_dev; 579 struct ifaddrmsg *ifm; 580 struct in_ifaddr *ifa, **ifap; 581 int err = -EINVAL; 582 583 ASSERT_RTNL(); 584 585 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 586 if (err < 0) 587 goto errout; 588 589 ifm = nlmsg_data(nlh); 590 in_dev = inetdev_by_index(net, ifm->ifa_index); 591 if (!in_dev) { 592 err = -ENODEV; 593 goto errout; 594 } 595 596 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 597 ifap = &ifa->ifa_next) { 598 if (tb[IFA_LOCAL] && 599 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL])) 600 continue; 601 602 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 603 continue; 604 605 if (tb[IFA_ADDRESS] && 606 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 607 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa))) 608 continue; 609 610 if (ipv4_is_multicast(ifa->ifa_address)) 611 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa); 612 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); 613 return 0; 614 } 615 616 err = -EADDRNOTAVAIL; 617 errout: 618 return err; 619 } 620 621 #define INFINITY_LIFE_TIME 0xFFFFFFFF 622 623 static void check_lifetime(struct work_struct *work) 624 { 625 unsigned long now, next, next_sec, next_sched; 626 struct in_ifaddr *ifa; 627 struct hlist_node *n; 628 int i; 629 630 now = jiffies; 631 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); 632 633 for (i = 0; i < IN4_ADDR_HSIZE; i++) { 634 bool change_needed = false; 635 636 rcu_read_lock(); 637 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { 638 unsigned long age; 639 640 if (ifa->ifa_flags & IFA_F_PERMANENT) 641 continue; 642 643 /* We try to batch several events at once. */ 644 age = (now - ifa->ifa_tstamp + 645 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 646 647 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 648 age >= ifa->ifa_valid_lft) { 649 change_needed = true; 650 } else if (ifa->ifa_preferred_lft == 651 INFINITY_LIFE_TIME) { 652 continue; 653 } else if (age >= ifa->ifa_preferred_lft) { 654 if (time_before(ifa->ifa_tstamp + 655 ifa->ifa_valid_lft * HZ, next)) 656 next = ifa->ifa_tstamp + 657 ifa->ifa_valid_lft * HZ; 658 659 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) 660 change_needed = true; 661 } else if (time_before(ifa->ifa_tstamp + 662 ifa->ifa_preferred_lft * HZ, 663 next)) { 664 next = ifa->ifa_tstamp + 665 ifa->ifa_preferred_lft * HZ; 666 } 667 } 668 rcu_read_unlock(); 669 if (!change_needed) 670 continue; 671 rtnl_lock(); 672 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) { 673 unsigned long age; 674 675 if (ifa->ifa_flags & IFA_F_PERMANENT) 676 continue; 677 678 /* We try to batch several events at once. */ 679 age = (now - ifa->ifa_tstamp + 680 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 681 682 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 683 age >= ifa->ifa_valid_lft) { 684 struct in_ifaddr **ifap; 685 686 for (ifap = &ifa->ifa_dev->ifa_list; 687 *ifap != NULL; ifap = &(*ifap)->ifa_next) { 688 if (*ifap == ifa) { 689 inet_del_ifa(ifa->ifa_dev, 690 ifap, 1); 691 break; 692 } 693 } 694 } else if (ifa->ifa_preferred_lft != 695 INFINITY_LIFE_TIME && 696 age >= ifa->ifa_preferred_lft && 697 !(ifa->ifa_flags & IFA_F_DEPRECATED)) { 698 ifa->ifa_flags |= IFA_F_DEPRECATED; 699 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 700 } 701 } 702 rtnl_unlock(); 703 } 704 705 next_sec = round_jiffies_up(next); 706 next_sched = next; 707 708 /* If rounded timeout is accurate enough, accept it. */ 709 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ)) 710 next_sched = next_sec; 711 712 now = jiffies; 713 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */ 714 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX)) 715 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX; 716 717 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 718 next_sched - now); 719 } 720 721 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft, 722 __u32 prefered_lft) 723 { 724 unsigned long timeout; 725 726 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); 727 728 timeout = addrconf_timeout_fixup(valid_lft, HZ); 729 if (addrconf_finite_timeout(timeout)) 730 ifa->ifa_valid_lft = timeout; 731 else 732 ifa->ifa_flags |= IFA_F_PERMANENT; 733 734 timeout = addrconf_timeout_fixup(prefered_lft, HZ); 735 if (addrconf_finite_timeout(timeout)) { 736 if (timeout == 0) 737 ifa->ifa_flags |= IFA_F_DEPRECATED; 738 ifa->ifa_preferred_lft = timeout; 739 } 740 ifa->ifa_tstamp = jiffies; 741 if (!ifa->ifa_cstamp) 742 ifa->ifa_cstamp = ifa->ifa_tstamp; 743 } 744 745 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, 746 __u32 *pvalid_lft, __u32 *pprefered_lft) 747 { 748 struct nlattr *tb[IFA_MAX+1]; 749 struct in_ifaddr *ifa; 750 struct ifaddrmsg *ifm; 751 struct net_device *dev; 752 struct in_device *in_dev; 753 int err; 754 755 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 756 if (err < 0) 757 goto errout; 758 759 ifm = nlmsg_data(nlh); 760 err = -EINVAL; 761 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL]) 762 goto errout; 763 764 dev = __dev_get_by_index(net, ifm->ifa_index); 765 err = -ENODEV; 766 if (!dev) 767 goto errout; 768 769 in_dev = __in_dev_get_rtnl(dev); 770 err = -ENOBUFS; 771 if (!in_dev) 772 goto errout; 773 774 ifa = inet_alloc_ifa(); 775 if (!ifa) 776 /* 777 * A potential indev allocation can be left alive, it stays 778 * assigned to its device and is destroy with it. 779 */ 780 goto errout; 781 782 ipv4_devconf_setall(in_dev); 783 neigh_parms_data_state_setall(in_dev->arp_parms); 784 in_dev_hold(in_dev); 785 786 if (!tb[IFA_ADDRESS]) 787 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 788 789 INIT_HLIST_NODE(&ifa->hash); 790 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 791 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 792 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : 793 ifm->ifa_flags; 794 ifa->ifa_scope = ifm->ifa_scope; 795 ifa->ifa_dev = in_dev; 796 797 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]); 798 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]); 799 800 if (tb[IFA_BROADCAST]) 801 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]); 802 803 if (tb[IFA_LABEL]) 804 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 805 else 806 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 807 808 if (tb[IFA_CACHEINFO]) { 809 struct ifa_cacheinfo *ci; 810 811 ci = nla_data(tb[IFA_CACHEINFO]); 812 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { 813 err = -EINVAL; 814 goto errout_free; 815 } 816 *pvalid_lft = ci->ifa_valid; 817 *pprefered_lft = ci->ifa_prefered; 818 } 819 820 return ifa; 821 822 errout_free: 823 inet_free_ifa(ifa); 824 errout: 825 return ERR_PTR(err); 826 } 827 828 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) 829 { 830 struct in_device *in_dev = ifa->ifa_dev; 831 struct in_ifaddr *ifa1, **ifap; 832 833 if (!ifa->ifa_local) 834 return NULL; 835 836 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 837 ifap = &ifa1->ifa_next) { 838 if (ifa1->ifa_mask == ifa->ifa_mask && 839 inet_ifa_match(ifa1->ifa_address, ifa) && 840 ifa1->ifa_local == ifa->ifa_local) 841 return ifa1; 842 } 843 return NULL; 844 } 845 846 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) 847 { 848 struct net *net = sock_net(skb->sk); 849 struct in_ifaddr *ifa; 850 struct in_ifaddr *ifa_existing; 851 __u32 valid_lft = INFINITY_LIFE_TIME; 852 __u32 prefered_lft = INFINITY_LIFE_TIME; 853 854 ASSERT_RTNL(); 855 856 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft); 857 if (IS_ERR(ifa)) 858 return PTR_ERR(ifa); 859 860 ifa_existing = find_matching_ifa(ifa); 861 if (!ifa_existing) { 862 /* It would be best to check for !NLM_F_CREATE here but 863 * userspace already relies on not having to provide this. 864 */ 865 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 866 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) { 867 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk, 868 true, ifa); 869 870 if (ret < 0) { 871 inet_free_ifa(ifa); 872 return ret; 873 } 874 } 875 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); 876 } else { 877 inet_free_ifa(ifa); 878 879 if (nlh->nlmsg_flags & NLM_F_EXCL || 880 !(nlh->nlmsg_flags & NLM_F_REPLACE)) 881 return -EEXIST; 882 ifa = ifa_existing; 883 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 884 cancel_delayed_work(&check_lifetime_work); 885 queue_delayed_work(system_power_efficient_wq, 886 &check_lifetime_work, 0); 887 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); 888 } 889 return 0; 890 } 891 892 /* 893 * Determine a default network mask, based on the IP address. 894 */ 895 896 static int inet_abc_len(__be32 addr) 897 { 898 int rc = -1; /* Something else, probably a multicast. */ 899 900 if (ipv4_is_zeronet(addr)) 901 rc = 0; 902 else { 903 __u32 haddr = ntohl(addr); 904 905 if (IN_CLASSA(haddr)) 906 rc = 8; 907 else if (IN_CLASSB(haddr)) 908 rc = 16; 909 else if (IN_CLASSC(haddr)) 910 rc = 24; 911 } 912 913 return rc; 914 } 915 916 917 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 918 { 919 struct ifreq ifr; 920 struct sockaddr_in sin_orig; 921 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 922 struct in_device *in_dev; 923 struct in_ifaddr **ifap = NULL; 924 struct in_ifaddr *ifa = NULL; 925 struct net_device *dev; 926 char *colon; 927 int ret = -EFAULT; 928 int tryaddrmatch = 0; 929 930 /* 931 * Fetch the caller's info block into kernel space 932 */ 933 934 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 935 goto out; 936 ifr.ifr_name[IFNAMSIZ - 1] = 0; 937 938 /* save original address for comparison */ 939 memcpy(&sin_orig, sin, sizeof(*sin)); 940 941 colon = strchr(ifr.ifr_name, ':'); 942 if (colon) 943 *colon = 0; 944 945 dev_load(net, ifr.ifr_name); 946 947 switch (cmd) { 948 case SIOCGIFADDR: /* Get interface address */ 949 case SIOCGIFBRDADDR: /* Get the broadcast address */ 950 case SIOCGIFDSTADDR: /* Get the destination address */ 951 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 952 /* Note that these ioctls will not sleep, 953 so that we do not impose a lock. 954 One day we will be forced to put shlock here (I mean SMP) 955 */ 956 tryaddrmatch = (sin_orig.sin_family == AF_INET); 957 memset(sin, 0, sizeof(*sin)); 958 sin->sin_family = AF_INET; 959 break; 960 961 case SIOCSIFFLAGS: 962 ret = -EPERM; 963 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 964 goto out; 965 break; 966 case SIOCSIFADDR: /* Set interface address (and family) */ 967 case SIOCSIFBRDADDR: /* Set the broadcast address */ 968 case SIOCSIFDSTADDR: /* Set the destination address */ 969 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 970 ret = -EPERM; 971 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 972 goto out; 973 ret = -EINVAL; 974 if (sin->sin_family != AF_INET) 975 goto out; 976 break; 977 default: 978 ret = -EINVAL; 979 goto out; 980 } 981 982 rtnl_lock(); 983 984 ret = -ENODEV; 985 dev = __dev_get_by_name(net, ifr.ifr_name); 986 if (!dev) 987 goto done; 988 989 if (colon) 990 *colon = ':'; 991 992 in_dev = __in_dev_get_rtnl(dev); 993 if (in_dev) { 994 if (tryaddrmatch) { 995 /* Matthias Andree */ 996 /* compare label and address (4.4BSD style) */ 997 /* note: we only do this for a limited set of ioctls 998 and only if the original address family was AF_INET. 999 This is checked above. */ 1000 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1001 ifap = &ifa->ifa_next) { 1002 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 1003 sin_orig.sin_addr.s_addr == 1004 ifa->ifa_local) { 1005 break; /* found */ 1006 } 1007 } 1008 } 1009 /* we didn't get a match, maybe the application is 1010 4.3BSD-style and passed in junk so we fall back to 1011 comparing just the label */ 1012 if (!ifa) { 1013 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 1014 ifap = &ifa->ifa_next) 1015 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 1016 break; 1017 } 1018 } 1019 1020 ret = -EADDRNOTAVAIL; 1021 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 1022 goto done; 1023 1024 switch (cmd) { 1025 case SIOCGIFADDR: /* Get interface address */ 1026 sin->sin_addr.s_addr = ifa->ifa_local; 1027 goto rarok; 1028 1029 case SIOCGIFBRDADDR: /* Get the broadcast address */ 1030 sin->sin_addr.s_addr = ifa->ifa_broadcast; 1031 goto rarok; 1032 1033 case SIOCGIFDSTADDR: /* Get the destination address */ 1034 sin->sin_addr.s_addr = ifa->ifa_address; 1035 goto rarok; 1036 1037 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 1038 sin->sin_addr.s_addr = ifa->ifa_mask; 1039 goto rarok; 1040 1041 case SIOCSIFFLAGS: 1042 if (colon) { 1043 ret = -EADDRNOTAVAIL; 1044 if (!ifa) 1045 break; 1046 ret = 0; 1047 if (!(ifr.ifr_flags & IFF_UP)) 1048 inet_del_ifa(in_dev, ifap, 1); 1049 break; 1050 } 1051 ret = dev_change_flags(dev, ifr.ifr_flags); 1052 break; 1053 1054 case SIOCSIFADDR: /* Set interface address (and family) */ 1055 ret = -EINVAL; 1056 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 1057 break; 1058 1059 if (!ifa) { 1060 ret = -ENOBUFS; 1061 ifa = inet_alloc_ifa(); 1062 if (!ifa) 1063 break; 1064 INIT_HLIST_NODE(&ifa->hash); 1065 if (colon) 1066 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 1067 else 1068 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1069 } else { 1070 ret = 0; 1071 if (ifa->ifa_local == sin->sin_addr.s_addr) 1072 break; 1073 inet_del_ifa(in_dev, ifap, 0); 1074 ifa->ifa_broadcast = 0; 1075 ifa->ifa_scope = 0; 1076 } 1077 1078 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 1079 1080 if (!(dev->flags & IFF_POINTOPOINT)) { 1081 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 1082 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 1083 if ((dev->flags & IFF_BROADCAST) && 1084 ifa->ifa_prefixlen < 31) 1085 ifa->ifa_broadcast = ifa->ifa_address | 1086 ~ifa->ifa_mask; 1087 } else { 1088 ifa->ifa_prefixlen = 32; 1089 ifa->ifa_mask = inet_make_mask(32); 1090 } 1091 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); 1092 ret = inet_set_ifa(dev, ifa); 1093 break; 1094 1095 case SIOCSIFBRDADDR: /* Set the broadcast address */ 1096 ret = 0; 1097 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 1098 inet_del_ifa(in_dev, ifap, 0); 1099 ifa->ifa_broadcast = sin->sin_addr.s_addr; 1100 inet_insert_ifa(ifa); 1101 } 1102 break; 1103 1104 case SIOCSIFDSTADDR: /* Set the destination address */ 1105 ret = 0; 1106 if (ifa->ifa_address == sin->sin_addr.s_addr) 1107 break; 1108 ret = -EINVAL; 1109 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 1110 break; 1111 ret = 0; 1112 inet_del_ifa(in_dev, ifap, 0); 1113 ifa->ifa_address = sin->sin_addr.s_addr; 1114 inet_insert_ifa(ifa); 1115 break; 1116 1117 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 1118 1119 /* 1120 * The mask we set must be legal. 1121 */ 1122 ret = -EINVAL; 1123 if (bad_mask(sin->sin_addr.s_addr, 0)) 1124 break; 1125 ret = 0; 1126 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 1127 __be32 old_mask = ifa->ifa_mask; 1128 inet_del_ifa(in_dev, ifap, 0); 1129 ifa->ifa_mask = sin->sin_addr.s_addr; 1130 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 1131 1132 /* See if current broadcast address matches 1133 * with current netmask, then recalculate 1134 * the broadcast address. Otherwise it's a 1135 * funny address, so don't touch it since 1136 * the user seems to know what (s)he's doing... 1137 */ 1138 if ((dev->flags & IFF_BROADCAST) && 1139 (ifa->ifa_prefixlen < 31) && 1140 (ifa->ifa_broadcast == 1141 (ifa->ifa_local|~old_mask))) { 1142 ifa->ifa_broadcast = (ifa->ifa_local | 1143 ~sin->sin_addr.s_addr); 1144 } 1145 inet_insert_ifa(ifa); 1146 } 1147 break; 1148 } 1149 done: 1150 rtnl_unlock(); 1151 out: 1152 return ret; 1153 rarok: 1154 rtnl_unlock(); 1155 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 1156 goto out; 1157 } 1158 1159 static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 1160 { 1161 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1162 struct in_ifaddr *ifa; 1163 struct ifreq ifr; 1164 int done = 0; 1165 1166 if (!in_dev) 1167 goto out; 1168 1169 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1170 if (!buf) { 1171 done += sizeof(ifr); 1172 continue; 1173 } 1174 if (len < (int) sizeof(ifr)) 1175 break; 1176 memset(&ifr, 0, sizeof(struct ifreq)); 1177 strcpy(ifr.ifr_name, ifa->ifa_label); 1178 1179 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 1180 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 1181 ifa->ifa_local; 1182 1183 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 1184 done = -EFAULT; 1185 break; 1186 } 1187 buf += sizeof(struct ifreq); 1188 len -= sizeof(struct ifreq); 1189 done += sizeof(struct ifreq); 1190 } 1191 out: 1192 return done; 1193 } 1194 1195 static __be32 in_dev_select_addr(const struct in_device *in_dev, 1196 int scope) 1197 { 1198 for_primary_ifa(in_dev) { 1199 if (ifa->ifa_scope != RT_SCOPE_LINK && 1200 ifa->ifa_scope <= scope) 1201 return ifa->ifa_local; 1202 } endfor_ifa(in_dev); 1203 1204 return 0; 1205 } 1206 1207 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 1208 { 1209 __be32 addr = 0; 1210 struct in_device *in_dev; 1211 struct net *net = dev_net(dev); 1212 int master_idx; 1213 1214 rcu_read_lock(); 1215 in_dev = __in_dev_get_rcu(dev); 1216 if (!in_dev) 1217 goto no_in_dev; 1218 1219 for_primary_ifa(in_dev) { 1220 if (ifa->ifa_scope > scope) 1221 continue; 1222 if (!dst || inet_ifa_match(dst, ifa)) { 1223 addr = ifa->ifa_local; 1224 break; 1225 } 1226 if (!addr) 1227 addr = ifa->ifa_local; 1228 } endfor_ifa(in_dev); 1229 1230 if (addr) 1231 goto out_unlock; 1232 no_in_dev: 1233 master_idx = l3mdev_master_ifindex_rcu(dev); 1234 1235 /* For VRFs, the VRF device takes the place of the loopback device, 1236 * with addresses on it being preferred. Note in such cases the 1237 * loopback device will be among the devices that fail the master_idx 1238 * equality check in the loop below. 1239 */ 1240 if (master_idx && 1241 (dev = dev_get_by_index_rcu(net, master_idx)) && 1242 (in_dev = __in_dev_get_rcu(dev))) { 1243 addr = in_dev_select_addr(in_dev, scope); 1244 if (addr) 1245 goto out_unlock; 1246 } 1247 1248 /* Not loopback addresses on loopback should be preferred 1249 in this case. It is important that lo is the first interface 1250 in dev_base list. 1251 */ 1252 for_each_netdev_rcu(net, dev) { 1253 if (l3mdev_master_ifindex_rcu(dev) != master_idx) 1254 continue; 1255 1256 in_dev = __in_dev_get_rcu(dev); 1257 if (!in_dev) 1258 continue; 1259 1260 addr = in_dev_select_addr(in_dev, scope); 1261 if (addr) 1262 goto out_unlock; 1263 } 1264 out_unlock: 1265 rcu_read_unlock(); 1266 return addr; 1267 } 1268 EXPORT_SYMBOL(inet_select_addr); 1269 1270 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 1271 __be32 local, int scope) 1272 { 1273 int same = 0; 1274 __be32 addr = 0; 1275 1276 for_ifa(in_dev) { 1277 if (!addr && 1278 (local == ifa->ifa_local || !local) && 1279 ifa->ifa_scope <= scope) { 1280 addr = ifa->ifa_local; 1281 if (same) 1282 break; 1283 } 1284 if (!same) { 1285 same = (!local || inet_ifa_match(local, ifa)) && 1286 (!dst || inet_ifa_match(dst, ifa)); 1287 if (same && addr) { 1288 if (local || !dst) 1289 break; 1290 /* Is the selected addr into dst subnet? */ 1291 if (inet_ifa_match(addr, ifa)) 1292 break; 1293 /* No, then can we use new local src? */ 1294 if (ifa->ifa_scope <= scope) { 1295 addr = ifa->ifa_local; 1296 break; 1297 } 1298 /* search for large dst subnet for addr */ 1299 same = 0; 1300 } 1301 } 1302 } endfor_ifa(in_dev); 1303 1304 return same ? addr : 0; 1305 } 1306 1307 /* 1308 * Confirm that local IP address exists using wildcards: 1309 * - net: netns to check, cannot be NULL 1310 * - in_dev: only on this interface, NULL=any interface 1311 * - dst: only in the same subnet as dst, 0=any dst 1312 * - local: address, 0=autoselect the local address 1313 * - scope: maximum allowed scope value for the local address 1314 */ 1315 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, 1316 __be32 dst, __be32 local, int scope) 1317 { 1318 __be32 addr = 0; 1319 struct net_device *dev; 1320 1321 if (in_dev) 1322 return confirm_addr_indev(in_dev, dst, local, scope); 1323 1324 rcu_read_lock(); 1325 for_each_netdev_rcu(net, dev) { 1326 in_dev = __in_dev_get_rcu(dev); 1327 if (in_dev) { 1328 addr = confirm_addr_indev(in_dev, dst, local, scope); 1329 if (addr) 1330 break; 1331 } 1332 } 1333 rcu_read_unlock(); 1334 1335 return addr; 1336 } 1337 EXPORT_SYMBOL(inet_confirm_addr); 1338 1339 /* 1340 * Device notifier 1341 */ 1342 1343 int register_inetaddr_notifier(struct notifier_block *nb) 1344 { 1345 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1346 } 1347 EXPORT_SYMBOL(register_inetaddr_notifier); 1348 1349 int unregister_inetaddr_notifier(struct notifier_block *nb) 1350 { 1351 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1352 } 1353 EXPORT_SYMBOL(unregister_inetaddr_notifier); 1354 1355 /* Rename ifa_labels for a device name change. Make some effort to preserve 1356 * existing alias numbering and to create unique labels if possible. 1357 */ 1358 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1359 { 1360 struct in_ifaddr *ifa; 1361 int named = 0; 1362 1363 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1364 char old[IFNAMSIZ], *dot; 1365 1366 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1367 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1368 if (named++ == 0) 1369 goto skip; 1370 dot = strchr(old, ':'); 1371 if (!dot) { 1372 sprintf(old, ":%d", named); 1373 dot = old; 1374 } 1375 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1376 strcat(ifa->ifa_label, dot); 1377 else 1378 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1379 skip: 1380 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1381 } 1382 } 1383 1384 static bool inetdev_valid_mtu(unsigned int mtu) 1385 { 1386 return mtu >= 68; 1387 } 1388 1389 static void inetdev_send_gratuitous_arp(struct net_device *dev, 1390 struct in_device *in_dev) 1391 1392 { 1393 struct in_ifaddr *ifa; 1394 1395 for (ifa = in_dev->ifa_list; ifa; 1396 ifa = ifa->ifa_next) { 1397 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1398 ifa->ifa_local, dev, 1399 ifa->ifa_local, NULL, 1400 dev->dev_addr, NULL); 1401 } 1402 } 1403 1404 /* Called only under RTNL semaphore */ 1405 1406 static int inetdev_event(struct notifier_block *this, unsigned long event, 1407 void *ptr) 1408 { 1409 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1410 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1411 1412 ASSERT_RTNL(); 1413 1414 if (!in_dev) { 1415 if (event == NETDEV_REGISTER) { 1416 in_dev = inetdev_init(dev); 1417 if (IS_ERR(in_dev)) 1418 return notifier_from_errno(PTR_ERR(in_dev)); 1419 if (dev->flags & IFF_LOOPBACK) { 1420 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1421 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1422 } 1423 } else if (event == NETDEV_CHANGEMTU) { 1424 /* Re-enabling IP */ 1425 if (inetdev_valid_mtu(dev->mtu)) 1426 in_dev = inetdev_init(dev); 1427 } 1428 goto out; 1429 } 1430 1431 switch (event) { 1432 case NETDEV_REGISTER: 1433 pr_debug("%s: bug\n", __func__); 1434 RCU_INIT_POINTER(dev->ip_ptr, NULL); 1435 break; 1436 case NETDEV_UP: 1437 if (!inetdev_valid_mtu(dev->mtu)) 1438 break; 1439 if (dev->flags & IFF_LOOPBACK) { 1440 struct in_ifaddr *ifa = inet_alloc_ifa(); 1441 1442 if (ifa) { 1443 INIT_HLIST_NODE(&ifa->hash); 1444 ifa->ifa_local = 1445 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1446 ifa->ifa_prefixlen = 8; 1447 ifa->ifa_mask = inet_make_mask(8); 1448 in_dev_hold(in_dev); 1449 ifa->ifa_dev = in_dev; 1450 ifa->ifa_scope = RT_SCOPE_HOST; 1451 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1452 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, 1453 INFINITY_LIFE_TIME); 1454 ipv4_devconf_setall(in_dev); 1455 neigh_parms_data_state_setall(in_dev->arp_parms); 1456 inet_insert_ifa(ifa); 1457 } 1458 } 1459 ip_mc_up(in_dev); 1460 /* fall through */ 1461 case NETDEV_CHANGEADDR: 1462 if (!IN_DEV_ARP_NOTIFY(in_dev)) 1463 break; 1464 /* fall through */ 1465 case NETDEV_NOTIFY_PEERS: 1466 /* Send gratuitous ARP to notify of link change */ 1467 inetdev_send_gratuitous_arp(dev, in_dev); 1468 break; 1469 case NETDEV_DOWN: 1470 ip_mc_down(in_dev); 1471 break; 1472 case NETDEV_PRE_TYPE_CHANGE: 1473 ip_mc_unmap(in_dev); 1474 break; 1475 case NETDEV_POST_TYPE_CHANGE: 1476 ip_mc_remap(in_dev); 1477 break; 1478 case NETDEV_CHANGEMTU: 1479 if (inetdev_valid_mtu(dev->mtu)) 1480 break; 1481 /* disable IP when MTU is not enough */ 1482 case NETDEV_UNREGISTER: 1483 inetdev_destroy(in_dev); 1484 break; 1485 case NETDEV_CHANGENAME: 1486 /* Do not notify about label change, this event is 1487 * not interesting to applications using netlink. 1488 */ 1489 inetdev_changename(dev, in_dev); 1490 1491 devinet_sysctl_unregister(in_dev); 1492 devinet_sysctl_register(in_dev); 1493 break; 1494 } 1495 out: 1496 return NOTIFY_DONE; 1497 } 1498 1499 static struct notifier_block ip_netdev_notifier = { 1500 .notifier_call = inetdev_event, 1501 }; 1502 1503 static size_t inet_nlmsg_size(void) 1504 { 1505 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1506 + nla_total_size(4) /* IFA_ADDRESS */ 1507 + nla_total_size(4) /* IFA_LOCAL */ 1508 + nla_total_size(4) /* IFA_BROADCAST */ 1509 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ 1510 + nla_total_size(4) /* IFA_FLAGS */ 1511 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ 1512 } 1513 1514 static inline u32 cstamp_delta(unsigned long cstamp) 1515 { 1516 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; 1517 } 1518 1519 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, 1520 unsigned long tstamp, u32 preferred, u32 valid) 1521 { 1522 struct ifa_cacheinfo ci; 1523 1524 ci.cstamp = cstamp_delta(cstamp); 1525 ci.tstamp = cstamp_delta(tstamp); 1526 ci.ifa_prefered = preferred; 1527 ci.ifa_valid = valid; 1528 1529 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); 1530 } 1531 1532 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1533 u32 portid, u32 seq, int event, unsigned int flags) 1534 { 1535 struct ifaddrmsg *ifm; 1536 struct nlmsghdr *nlh; 1537 u32 preferred, valid; 1538 1539 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); 1540 if (!nlh) 1541 return -EMSGSIZE; 1542 1543 ifm = nlmsg_data(nlh); 1544 ifm->ifa_family = AF_INET; 1545 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1546 ifm->ifa_flags = ifa->ifa_flags; 1547 ifm->ifa_scope = ifa->ifa_scope; 1548 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1549 1550 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { 1551 preferred = ifa->ifa_preferred_lft; 1552 valid = ifa->ifa_valid_lft; 1553 if (preferred != INFINITY_LIFE_TIME) { 1554 long tval = (jiffies - ifa->ifa_tstamp) / HZ; 1555 1556 if (preferred > tval) 1557 preferred -= tval; 1558 else 1559 preferred = 0; 1560 if (valid != INFINITY_LIFE_TIME) { 1561 if (valid > tval) 1562 valid -= tval; 1563 else 1564 valid = 0; 1565 } 1566 } 1567 } else { 1568 preferred = INFINITY_LIFE_TIME; 1569 valid = INFINITY_LIFE_TIME; 1570 } 1571 if ((ifa->ifa_address && 1572 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) || 1573 (ifa->ifa_local && 1574 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) || 1575 (ifa->ifa_broadcast && 1576 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || 1577 (ifa->ifa_label[0] && 1578 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || 1579 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || 1580 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, 1581 preferred, valid)) 1582 goto nla_put_failure; 1583 1584 nlmsg_end(skb, nlh); 1585 return 0; 1586 1587 nla_put_failure: 1588 nlmsg_cancel(skb, nlh); 1589 return -EMSGSIZE; 1590 } 1591 1592 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1593 { 1594 struct net *net = sock_net(skb->sk); 1595 int h, s_h; 1596 int idx, s_idx; 1597 int ip_idx, s_ip_idx; 1598 struct net_device *dev; 1599 struct in_device *in_dev; 1600 struct in_ifaddr *ifa; 1601 struct hlist_head *head; 1602 1603 s_h = cb->args[0]; 1604 s_idx = idx = cb->args[1]; 1605 s_ip_idx = ip_idx = cb->args[2]; 1606 1607 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1608 idx = 0; 1609 head = &net->dev_index_head[h]; 1610 rcu_read_lock(); 1611 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 1612 net->dev_base_seq; 1613 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1614 if (idx < s_idx) 1615 goto cont; 1616 if (h > s_h || idx > s_idx) 1617 s_ip_idx = 0; 1618 in_dev = __in_dev_get_rcu(dev); 1619 if (!in_dev) 1620 goto cont; 1621 1622 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1623 ifa = ifa->ifa_next, ip_idx++) { 1624 if (ip_idx < s_ip_idx) 1625 continue; 1626 if (inet_fill_ifaddr(skb, ifa, 1627 NETLINK_CB(cb->skb).portid, 1628 cb->nlh->nlmsg_seq, 1629 RTM_NEWADDR, NLM_F_MULTI) < 0) { 1630 rcu_read_unlock(); 1631 goto done; 1632 } 1633 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1634 } 1635 cont: 1636 idx++; 1637 } 1638 rcu_read_unlock(); 1639 } 1640 1641 done: 1642 cb->args[0] = h; 1643 cb->args[1] = idx; 1644 cb->args[2] = ip_idx; 1645 1646 return skb->len; 1647 } 1648 1649 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1650 u32 portid) 1651 { 1652 struct sk_buff *skb; 1653 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1654 int err = -ENOBUFS; 1655 struct net *net; 1656 1657 net = dev_net(ifa->ifa_dev->dev); 1658 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1659 if (!skb) 1660 goto errout; 1661 1662 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); 1663 if (err < 0) { 1664 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1665 WARN_ON(err == -EMSGSIZE); 1666 kfree_skb(skb); 1667 goto errout; 1668 } 1669 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1670 return; 1671 errout: 1672 if (err < 0) 1673 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1674 } 1675 1676 static size_t inet_get_link_af_size(const struct net_device *dev, 1677 u32 ext_filter_mask) 1678 { 1679 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1680 1681 if (!in_dev) 1682 return 0; 1683 1684 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1685 } 1686 1687 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev, 1688 u32 ext_filter_mask) 1689 { 1690 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1691 struct nlattr *nla; 1692 int i; 1693 1694 if (!in_dev) 1695 return -ENODATA; 1696 1697 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1698 if (!nla) 1699 return -EMSGSIZE; 1700 1701 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1702 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1703 1704 return 0; 1705 } 1706 1707 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1708 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1709 }; 1710 1711 static int inet_validate_link_af(const struct net_device *dev, 1712 const struct nlattr *nla) 1713 { 1714 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1715 int err, rem; 1716 1717 if (dev && !__in_dev_get_rtnl(dev)) 1718 return -EAFNOSUPPORT; 1719 1720 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); 1721 if (err < 0) 1722 return err; 1723 1724 if (tb[IFLA_INET_CONF]) { 1725 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1726 int cfgid = nla_type(a); 1727 1728 if (nla_len(a) < 4) 1729 return -EINVAL; 1730 1731 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1732 return -EINVAL; 1733 } 1734 } 1735 1736 return 0; 1737 } 1738 1739 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1740 { 1741 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1742 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1743 int rem; 1744 1745 if (!in_dev) 1746 return -EAFNOSUPPORT; 1747 1748 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) 1749 BUG(); 1750 1751 if (tb[IFLA_INET_CONF]) { 1752 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1753 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1754 } 1755 1756 return 0; 1757 } 1758 1759 static int inet_netconf_msgsize_devconf(int type) 1760 { 1761 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 1762 + nla_total_size(4); /* NETCONFA_IFINDEX */ 1763 bool all = false; 1764 1765 if (type == NETCONFA_ALL) 1766 all = true; 1767 1768 if (all || type == NETCONFA_FORWARDING) 1769 size += nla_total_size(4); 1770 if (all || type == NETCONFA_RP_FILTER) 1771 size += nla_total_size(4); 1772 if (all || type == NETCONFA_MC_FORWARDING) 1773 size += nla_total_size(4); 1774 if (all || type == NETCONFA_PROXY_NEIGH) 1775 size += nla_total_size(4); 1776 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) 1777 size += nla_total_size(4); 1778 1779 return size; 1780 } 1781 1782 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, 1783 struct ipv4_devconf *devconf, u32 portid, 1784 u32 seq, int event, unsigned int flags, 1785 int type) 1786 { 1787 struct nlmsghdr *nlh; 1788 struct netconfmsg *ncm; 1789 bool all = false; 1790 1791 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 1792 flags); 1793 if (!nlh) 1794 return -EMSGSIZE; 1795 1796 if (type == NETCONFA_ALL) 1797 all = true; 1798 1799 ncm = nlmsg_data(nlh); 1800 ncm->ncm_family = AF_INET; 1801 1802 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) 1803 goto nla_put_failure; 1804 1805 if (!devconf) 1806 goto out; 1807 1808 if ((all || type == NETCONFA_FORWARDING) && 1809 nla_put_s32(skb, NETCONFA_FORWARDING, 1810 IPV4_DEVCONF(*devconf, FORWARDING)) < 0) 1811 goto nla_put_failure; 1812 if ((all || type == NETCONFA_RP_FILTER) && 1813 nla_put_s32(skb, NETCONFA_RP_FILTER, 1814 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) 1815 goto nla_put_failure; 1816 if ((all || type == NETCONFA_MC_FORWARDING) && 1817 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 1818 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) 1819 goto nla_put_failure; 1820 if ((all || type == NETCONFA_PROXY_NEIGH) && 1821 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, 1822 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) 1823 goto nla_put_failure; 1824 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && 1825 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 1826 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) 1827 goto nla_put_failure; 1828 1829 out: 1830 nlmsg_end(skb, nlh); 1831 return 0; 1832 1833 nla_put_failure: 1834 nlmsg_cancel(skb, nlh); 1835 return -EMSGSIZE; 1836 } 1837 1838 void inet_netconf_notify_devconf(struct net *net, int event, int type, 1839 int ifindex, struct ipv4_devconf *devconf) 1840 { 1841 struct sk_buff *skb; 1842 int err = -ENOBUFS; 1843 1844 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL); 1845 if (!skb) 1846 goto errout; 1847 1848 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, 1849 event, 0, type); 1850 if (err < 0) { 1851 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1852 WARN_ON(err == -EMSGSIZE); 1853 kfree_skb(skb); 1854 goto errout; 1855 } 1856 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL); 1857 return; 1858 errout: 1859 if (err < 0) 1860 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); 1861 } 1862 1863 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { 1864 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1865 [NETCONFA_FORWARDING] = { .len = sizeof(int) }, 1866 [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, 1867 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, 1868 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, 1869 }; 1870 1871 static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1872 struct nlmsghdr *nlh) 1873 { 1874 struct net *net = sock_net(in_skb->sk); 1875 struct nlattr *tb[NETCONFA_MAX+1]; 1876 struct netconfmsg *ncm; 1877 struct sk_buff *skb; 1878 struct ipv4_devconf *devconf; 1879 struct in_device *in_dev; 1880 struct net_device *dev; 1881 int ifindex; 1882 int err; 1883 1884 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, 1885 devconf_ipv4_policy); 1886 if (err < 0) 1887 goto errout; 1888 1889 err = -EINVAL; 1890 if (!tb[NETCONFA_IFINDEX]) 1891 goto errout; 1892 1893 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); 1894 switch (ifindex) { 1895 case NETCONFA_IFINDEX_ALL: 1896 devconf = net->ipv4.devconf_all; 1897 break; 1898 case NETCONFA_IFINDEX_DEFAULT: 1899 devconf = net->ipv4.devconf_dflt; 1900 break; 1901 default: 1902 dev = __dev_get_by_index(net, ifindex); 1903 if (!dev) 1904 goto errout; 1905 in_dev = __in_dev_get_rtnl(dev); 1906 if (!in_dev) 1907 goto errout; 1908 devconf = &in_dev->cnf; 1909 break; 1910 } 1911 1912 err = -ENOBUFS; 1913 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); 1914 if (!skb) 1915 goto errout; 1916 1917 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 1918 NETLINK_CB(in_skb).portid, 1919 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 1920 NETCONFA_ALL); 1921 if (err < 0) { 1922 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1923 WARN_ON(err == -EMSGSIZE); 1924 kfree_skb(skb); 1925 goto errout; 1926 } 1927 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1928 errout: 1929 return err; 1930 } 1931 1932 static int inet_netconf_dump_devconf(struct sk_buff *skb, 1933 struct netlink_callback *cb) 1934 { 1935 struct net *net = sock_net(skb->sk); 1936 int h, s_h; 1937 int idx, s_idx; 1938 struct net_device *dev; 1939 struct in_device *in_dev; 1940 struct hlist_head *head; 1941 1942 s_h = cb->args[0]; 1943 s_idx = idx = cb->args[1]; 1944 1945 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1946 idx = 0; 1947 head = &net->dev_index_head[h]; 1948 rcu_read_lock(); 1949 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ 1950 net->dev_base_seq; 1951 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1952 if (idx < s_idx) 1953 goto cont; 1954 in_dev = __in_dev_get_rcu(dev); 1955 if (!in_dev) 1956 goto cont; 1957 1958 if (inet_netconf_fill_devconf(skb, dev->ifindex, 1959 &in_dev->cnf, 1960 NETLINK_CB(cb->skb).portid, 1961 cb->nlh->nlmsg_seq, 1962 RTM_NEWNETCONF, 1963 NLM_F_MULTI, 1964 NETCONFA_ALL) < 0) { 1965 rcu_read_unlock(); 1966 goto done; 1967 } 1968 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1969 cont: 1970 idx++; 1971 } 1972 rcu_read_unlock(); 1973 } 1974 if (h == NETDEV_HASHENTRIES) { 1975 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, 1976 net->ipv4.devconf_all, 1977 NETLINK_CB(cb->skb).portid, 1978 cb->nlh->nlmsg_seq, 1979 RTM_NEWNETCONF, NLM_F_MULTI, 1980 NETCONFA_ALL) < 0) 1981 goto done; 1982 else 1983 h++; 1984 } 1985 if (h == NETDEV_HASHENTRIES + 1) { 1986 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, 1987 net->ipv4.devconf_dflt, 1988 NETLINK_CB(cb->skb).portid, 1989 cb->nlh->nlmsg_seq, 1990 RTM_NEWNETCONF, NLM_F_MULTI, 1991 NETCONFA_ALL) < 0) 1992 goto done; 1993 else 1994 h++; 1995 } 1996 done: 1997 cb->args[0] = h; 1998 cb->args[1] = idx; 1999 2000 return skb->len; 2001 } 2002 2003 #ifdef CONFIG_SYSCTL 2004 2005 static void devinet_copy_dflt_conf(struct net *net, int i) 2006 { 2007 struct net_device *dev; 2008 2009 rcu_read_lock(); 2010 for_each_netdev_rcu(net, dev) { 2011 struct in_device *in_dev; 2012 2013 in_dev = __in_dev_get_rcu(dev); 2014 if (in_dev && !test_bit(i, in_dev->cnf.state)) 2015 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 2016 } 2017 rcu_read_unlock(); 2018 } 2019 2020 /* called with RTNL locked */ 2021 static void inet_forward_change(struct net *net) 2022 { 2023 struct net_device *dev; 2024 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 2025 2026 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 2027 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 2028 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2029 NETCONFA_FORWARDING, 2030 NETCONFA_IFINDEX_ALL, 2031 net->ipv4.devconf_all); 2032 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2033 NETCONFA_FORWARDING, 2034 NETCONFA_IFINDEX_DEFAULT, 2035 net->ipv4.devconf_dflt); 2036 2037 for_each_netdev(net, dev) { 2038 struct in_device *in_dev; 2039 2040 if (on) 2041 dev_disable_lro(dev); 2042 2043 in_dev = __in_dev_get_rtnl(dev); 2044 if (in_dev) { 2045 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 2046 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2047 NETCONFA_FORWARDING, 2048 dev->ifindex, &in_dev->cnf); 2049 } 2050 } 2051 } 2052 2053 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf) 2054 { 2055 if (cnf == net->ipv4.devconf_dflt) 2056 return NETCONFA_IFINDEX_DEFAULT; 2057 else if (cnf == net->ipv4.devconf_all) 2058 return NETCONFA_IFINDEX_ALL; 2059 else { 2060 struct in_device *idev 2061 = container_of(cnf, struct in_device, cnf); 2062 return idev->dev->ifindex; 2063 } 2064 } 2065 2066 static int devinet_conf_proc(struct ctl_table *ctl, int write, 2067 void __user *buffer, 2068 size_t *lenp, loff_t *ppos) 2069 { 2070 int old_value = *(int *)ctl->data; 2071 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2072 int new_value = *(int *)ctl->data; 2073 2074 if (write) { 2075 struct ipv4_devconf *cnf = ctl->extra1; 2076 struct net *net = ctl->extra2; 2077 int i = (int *)ctl->data - cnf->data; 2078 int ifindex; 2079 2080 set_bit(i, cnf->state); 2081 2082 if (cnf == net->ipv4.devconf_dflt) 2083 devinet_copy_dflt_conf(net, i); 2084 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || 2085 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 2086 if ((new_value == 0) && (old_value != 0)) 2087 rt_cache_flush(net); 2088 2089 if (i == IPV4_DEVCONF_RP_FILTER - 1 && 2090 new_value != old_value) { 2091 ifindex = devinet_conf_ifindex(net, cnf); 2092 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2093 NETCONFA_RP_FILTER, 2094 ifindex, cnf); 2095 } 2096 if (i == IPV4_DEVCONF_PROXY_ARP - 1 && 2097 new_value != old_value) { 2098 ifindex = devinet_conf_ifindex(net, cnf); 2099 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2100 NETCONFA_PROXY_NEIGH, 2101 ifindex, cnf); 2102 } 2103 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && 2104 new_value != old_value) { 2105 ifindex = devinet_conf_ifindex(net, cnf); 2106 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2107 NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 2108 ifindex, cnf); 2109 } 2110 } 2111 2112 return ret; 2113 } 2114 2115 static int devinet_sysctl_forward(struct ctl_table *ctl, int write, 2116 void __user *buffer, 2117 size_t *lenp, loff_t *ppos) 2118 { 2119 int *valp = ctl->data; 2120 int val = *valp; 2121 loff_t pos = *ppos; 2122 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2123 2124 if (write && *valp != val) { 2125 struct net *net = ctl->extra2; 2126 2127 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 2128 if (!rtnl_trylock()) { 2129 /* Restore the original values before restarting */ 2130 *valp = val; 2131 *ppos = pos; 2132 return restart_syscall(); 2133 } 2134 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 2135 inet_forward_change(net); 2136 } else { 2137 struct ipv4_devconf *cnf = ctl->extra1; 2138 struct in_device *idev = 2139 container_of(cnf, struct in_device, cnf); 2140 if (*valp) 2141 dev_disable_lro(idev->dev); 2142 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2143 NETCONFA_FORWARDING, 2144 idev->dev->ifindex, 2145 cnf); 2146 } 2147 rtnl_unlock(); 2148 rt_cache_flush(net); 2149 } else 2150 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, 2151 NETCONFA_FORWARDING, 2152 NETCONFA_IFINDEX_DEFAULT, 2153 net->ipv4.devconf_dflt); 2154 } 2155 2156 return ret; 2157 } 2158 2159 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, 2160 void __user *buffer, 2161 size_t *lenp, loff_t *ppos) 2162 { 2163 int *valp = ctl->data; 2164 int val = *valp; 2165 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 2166 struct net *net = ctl->extra2; 2167 2168 if (write && *valp != val) 2169 rt_cache_flush(net); 2170 2171 return ret; 2172 } 2173 2174 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 2175 { \ 2176 .procname = name, \ 2177 .data = ipv4_devconf.data + \ 2178 IPV4_DEVCONF_ ## attr - 1, \ 2179 .maxlen = sizeof(int), \ 2180 .mode = mval, \ 2181 .proc_handler = proc, \ 2182 .extra1 = &ipv4_devconf, \ 2183 } 2184 2185 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 2186 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 2187 2188 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 2189 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 2190 2191 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 2192 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 2193 2194 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 2195 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 2196 2197 static struct devinet_sysctl_table { 2198 struct ctl_table_header *sysctl_header; 2199 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 2200 } devinet_sysctl = { 2201 .devinet_vars = { 2202 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 2203 devinet_sysctl_forward), 2204 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 2205 2206 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 2207 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 2208 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 2209 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 2210 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 2211 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 2212 "accept_source_route"), 2213 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 2214 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 2215 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 2216 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 2217 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 2218 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 2219 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 2220 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 2221 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 2222 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 2223 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 2224 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 2225 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 2226 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, 2227 "force_igmp_version"), 2228 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL, 2229 "igmpv2_unsolicited_report_interval"), 2230 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, 2231 "igmpv3_unsolicited_report_interval"), 2232 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, 2233 "ignore_routes_with_linkdown"), 2234 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP, 2235 "drop_gratuitous_arp"), 2236 2237 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2238 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2239 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 2240 "promote_secondaries"), 2241 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 2242 "route_localnet"), 2243 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST, 2244 "drop_unicast_in_l2_multicast"), 2245 }, 2246 }; 2247 2248 static int __devinet_sysctl_register(struct net *net, char *dev_name, 2249 int ifindex, struct ipv4_devconf *p) 2250 { 2251 int i; 2252 struct devinet_sysctl_table *t; 2253 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; 2254 2255 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 2256 if (!t) 2257 goto out; 2258 2259 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 2260 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 2261 t->devinet_vars[i].extra1 = p; 2262 t->devinet_vars[i].extra2 = net; 2263 } 2264 2265 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); 2266 2267 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); 2268 if (!t->sysctl_header) 2269 goto free; 2270 2271 p->sysctl = t; 2272 2273 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, 2274 ifindex, p); 2275 return 0; 2276 2277 free: 2278 kfree(t); 2279 out: 2280 return -ENOBUFS; 2281 } 2282 2283 static void __devinet_sysctl_unregister(struct net *net, 2284 struct ipv4_devconf *cnf, int ifindex) 2285 { 2286 struct devinet_sysctl_table *t = cnf->sysctl; 2287 2288 if (t) { 2289 cnf->sysctl = NULL; 2290 unregister_net_sysctl_table(t->sysctl_header); 2291 kfree(t); 2292 } 2293 2294 inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); 2295 } 2296 2297 static int devinet_sysctl_register(struct in_device *idev) 2298 { 2299 int err; 2300 2301 if (!sysctl_dev_name_is_allowed(idev->dev->name)) 2302 return -EINVAL; 2303 2304 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL); 2305 if (err) 2306 return err; 2307 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 2308 idev->dev->ifindex, &idev->cnf); 2309 if (err) 2310 neigh_sysctl_unregister(idev->arp_parms); 2311 return err; 2312 } 2313 2314 static void devinet_sysctl_unregister(struct in_device *idev) 2315 { 2316 struct net *net = dev_net(idev->dev); 2317 2318 __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex); 2319 neigh_sysctl_unregister(idev->arp_parms); 2320 } 2321 2322 static struct ctl_table ctl_forward_entry[] = { 2323 { 2324 .procname = "ip_forward", 2325 .data = &ipv4_devconf.data[ 2326 IPV4_DEVCONF_FORWARDING - 1], 2327 .maxlen = sizeof(int), 2328 .mode = 0644, 2329 .proc_handler = devinet_sysctl_forward, 2330 .extra1 = &ipv4_devconf, 2331 .extra2 = &init_net, 2332 }, 2333 { }, 2334 }; 2335 #endif 2336 2337 static __net_init int devinet_init_net(struct net *net) 2338 { 2339 int err; 2340 struct ipv4_devconf *all, *dflt; 2341 #ifdef CONFIG_SYSCTL 2342 struct ctl_table *tbl = ctl_forward_entry; 2343 struct ctl_table_header *forw_hdr; 2344 #endif 2345 2346 err = -ENOMEM; 2347 all = &ipv4_devconf; 2348 dflt = &ipv4_devconf_dflt; 2349 2350 if (!net_eq(net, &init_net)) { 2351 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 2352 if (!all) 2353 goto err_alloc_all; 2354 2355 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 2356 if (!dflt) 2357 goto err_alloc_dflt; 2358 2359 #ifdef CONFIG_SYSCTL 2360 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 2361 if (!tbl) 2362 goto err_alloc_ctl; 2363 2364 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 2365 tbl[0].extra1 = all; 2366 tbl[0].extra2 = net; 2367 #endif 2368 } 2369 2370 #ifdef CONFIG_SYSCTL 2371 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all); 2372 if (err < 0) 2373 goto err_reg_all; 2374 2375 err = __devinet_sysctl_register(net, "default", 2376 NETCONFA_IFINDEX_DEFAULT, dflt); 2377 if (err < 0) 2378 goto err_reg_dflt; 2379 2380 err = -ENOMEM; 2381 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); 2382 if (!forw_hdr) 2383 goto err_reg_ctl; 2384 net->ipv4.forw_hdr = forw_hdr; 2385 #endif 2386 2387 net->ipv4.devconf_all = all; 2388 net->ipv4.devconf_dflt = dflt; 2389 return 0; 2390 2391 #ifdef CONFIG_SYSCTL 2392 err_reg_ctl: 2393 __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT); 2394 err_reg_dflt: 2395 __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); 2396 err_reg_all: 2397 if (tbl != ctl_forward_entry) 2398 kfree(tbl); 2399 err_alloc_ctl: 2400 #endif 2401 if (dflt != &ipv4_devconf_dflt) 2402 kfree(dflt); 2403 err_alloc_dflt: 2404 if (all != &ipv4_devconf) 2405 kfree(all); 2406 err_alloc_all: 2407 return err; 2408 } 2409 2410 static __net_exit void devinet_exit_net(struct net *net) 2411 { 2412 #ifdef CONFIG_SYSCTL 2413 struct ctl_table *tbl; 2414 2415 tbl = net->ipv4.forw_hdr->ctl_table_arg; 2416 unregister_net_sysctl_table(net->ipv4.forw_hdr); 2417 __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, 2418 NETCONFA_IFINDEX_DEFAULT); 2419 __devinet_sysctl_unregister(net, net->ipv4.devconf_all, 2420 NETCONFA_IFINDEX_ALL); 2421 kfree(tbl); 2422 #endif 2423 kfree(net->ipv4.devconf_dflt); 2424 kfree(net->ipv4.devconf_all); 2425 } 2426 2427 static __net_initdata struct pernet_operations devinet_ops = { 2428 .init = devinet_init_net, 2429 .exit = devinet_exit_net, 2430 }; 2431 2432 static struct rtnl_af_ops inet_af_ops __read_mostly = { 2433 .family = AF_INET, 2434 .fill_link_af = inet_fill_link_af, 2435 .get_link_af_size = inet_get_link_af_size, 2436 .validate_link_af = inet_validate_link_af, 2437 .set_link_af = inet_set_link_af, 2438 }; 2439 2440 void __init devinet_init(void) 2441 { 2442 int i; 2443 2444 for (i = 0; i < IN4_ADDR_HSIZE; i++) 2445 INIT_HLIST_HEAD(&inet_addr_lst[i]); 2446 2447 register_pernet_subsys(&devinet_ops); 2448 2449 register_gifconf(PF_INET, inet_gifconf); 2450 register_netdevice_notifier(&ip_netdev_notifier); 2451 2452 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); 2453 2454 rtnl_af_register(&inet_af_ops); 2455 2456 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 2457 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 2458 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 2459 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, 2460 inet_netconf_dump_devconf, NULL); 2461 } 2462