1 /* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29 #include <asm/uaccess.h> 30 #include <linux/bitops.h> 31 #include <linux/capability.h> 32 #include <linux/module.h> 33 #include <linux/types.h> 34 #include <linux/kernel.h> 35 #include <linux/string.h> 36 #include <linux/mm.h> 37 #include <linux/socket.h> 38 #include <linux/sockios.h> 39 #include <linux/in.h> 40 #include <linux/errno.h> 41 #include <linux/interrupt.h> 42 #include <linux/if_addr.h> 43 #include <linux/if_ether.h> 44 #include <linux/inet.h> 45 #include <linux/netdevice.h> 46 #include <linux/etherdevice.h> 47 #include <linux/skbuff.h> 48 #include <linux/init.h> 49 #include <linux/notifier.h> 50 #include <linux/inetdevice.h> 51 #include <linux/igmp.h> 52 #include <linux/slab.h> 53 #include <linux/hash.h> 54 #ifdef CONFIG_SYSCTL 55 #include <linux/sysctl.h> 56 #endif 57 #include <linux/kmod.h> 58 59 #include <net/arp.h> 60 #include <net/ip.h> 61 #include <net/route.h> 62 #include <net/ip_fib.h> 63 #include <net/rtnetlink.h> 64 #include <net/net_namespace.h> 65 66 #include "fib_lookup.h" 67 68 static struct ipv4_devconf ipv4_devconf = { 69 .data = { 70 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 71 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 72 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 73 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 74 }, 75 }; 76 77 static struct ipv4_devconf ipv4_devconf_dflt = { 78 .data = { 79 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 80 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 81 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 82 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 83 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 84 }, 85 }; 86 87 #define IPV4_DEVCONF_DFLT(net, attr) \ 88 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 89 90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 91 [IFA_LOCAL] = { .type = NLA_U32 }, 92 [IFA_ADDRESS] = { .type = NLA_U32 }, 93 [IFA_BROADCAST] = { .type = NLA_U32 }, 94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 95 }; 96 97 #define IN4_ADDR_HSIZE_SHIFT 8 98 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) 99 100 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 101 static DEFINE_SPINLOCK(inet_addr_hash_lock); 102 103 static u32 inet_addr_hash(struct net *net, __be32 addr) 104 { 105 u32 val = (__force u32) addr ^ net_hash_mix(net); 106 107 return hash_32(val, IN4_ADDR_HSIZE_SHIFT); 108 } 109 110 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 111 { 112 u32 hash = inet_addr_hash(net, ifa->ifa_local); 113 114 spin_lock(&inet_addr_hash_lock); 115 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 116 spin_unlock(&inet_addr_hash_lock); 117 } 118 119 static void inet_hash_remove(struct in_ifaddr *ifa) 120 { 121 spin_lock(&inet_addr_hash_lock); 122 hlist_del_init_rcu(&ifa->hash); 123 spin_unlock(&inet_addr_hash_lock); 124 } 125 126 /** 127 * __ip_dev_find - find the first device with a given source address. 128 * @net: the net namespace 129 * @addr: the source address 130 * @devref: if true, take a reference on the found device 131 * 132 * If a caller uses devref=false, it should be protected by RCU, or RTNL 133 */ 134 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 135 { 136 u32 hash = inet_addr_hash(net, addr); 137 struct net_device *result = NULL; 138 struct in_ifaddr *ifa; 139 struct hlist_node *node; 140 141 rcu_read_lock(); 142 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { 143 if (ifa->ifa_local == addr) { 144 struct net_device *dev = ifa->ifa_dev->dev; 145 146 if (!net_eq(dev_net(dev), net)) 147 continue; 148 result = dev; 149 break; 150 } 151 } 152 if (!result) { 153 struct flowi4 fl4 = { .daddr = addr }; 154 struct fib_result res = { 0 }; 155 struct fib_table *local; 156 157 /* Fallback to FIB local table so that communication 158 * over loopback subnets work. 159 */ 160 local = fib_get_table(net, RT_TABLE_LOCAL); 161 if (local && 162 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && 163 res.type == RTN_LOCAL) 164 result = FIB_RES_DEV(res); 165 } 166 if (result && devref) 167 dev_hold(result); 168 rcu_read_unlock(); 169 return result; 170 } 171 EXPORT_SYMBOL(__ip_dev_find); 172 173 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 174 175 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 176 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 177 int destroy); 178 #ifdef CONFIG_SYSCTL 179 static void devinet_sysctl_register(struct in_device *idev); 180 static void devinet_sysctl_unregister(struct in_device *idev); 181 #else 182 static void devinet_sysctl_register(struct in_device *idev) 183 { 184 } 185 static void devinet_sysctl_unregister(struct in_device *idev) 186 { 187 } 188 #endif 189 190 /* Locks all the inet devices. */ 191 192 static struct in_ifaddr *inet_alloc_ifa(void) 193 { 194 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 195 } 196 197 static void inet_rcu_free_ifa(struct rcu_head *head) 198 { 199 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 200 if (ifa->ifa_dev) 201 in_dev_put(ifa->ifa_dev); 202 kfree(ifa); 203 } 204 205 static void inet_free_ifa(struct in_ifaddr *ifa) 206 { 207 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 208 } 209 210 void in_dev_finish_destroy(struct in_device *idev) 211 { 212 struct net_device *dev = idev->dev; 213 214 WARN_ON(idev->ifa_list); 215 WARN_ON(idev->mc_list); 216 #ifdef NET_REFCNT_DEBUG 217 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); 218 #endif 219 dev_put(dev); 220 if (!idev->dead) 221 pr_err("Freeing alive in_device %p\n", idev); 222 else 223 kfree(idev); 224 } 225 EXPORT_SYMBOL(in_dev_finish_destroy); 226 227 static struct in_device *inetdev_init(struct net_device *dev) 228 { 229 struct in_device *in_dev; 230 231 ASSERT_RTNL(); 232 233 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 234 if (!in_dev) 235 goto out; 236 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 237 sizeof(in_dev->cnf)); 238 in_dev->cnf.sysctl = NULL; 239 in_dev->dev = dev; 240 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 241 if (!in_dev->arp_parms) 242 goto out_kfree; 243 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 244 dev_disable_lro(dev); 245 /* Reference in_dev->dev */ 246 dev_hold(dev); 247 /* Account for reference dev->ip_ptr (below) */ 248 in_dev_hold(in_dev); 249 250 devinet_sysctl_register(in_dev); 251 ip_mc_init_dev(in_dev); 252 if (dev->flags & IFF_UP) 253 ip_mc_up(in_dev); 254 255 /* we can receive as soon as ip_ptr is set -- do this last */ 256 rcu_assign_pointer(dev->ip_ptr, in_dev); 257 out: 258 return in_dev; 259 out_kfree: 260 kfree(in_dev); 261 in_dev = NULL; 262 goto out; 263 } 264 265 static void in_dev_rcu_put(struct rcu_head *head) 266 { 267 struct in_device *idev = container_of(head, struct in_device, rcu_head); 268 in_dev_put(idev); 269 } 270 271 static void inetdev_destroy(struct in_device *in_dev) 272 { 273 struct in_ifaddr *ifa; 274 struct net_device *dev; 275 276 ASSERT_RTNL(); 277 278 dev = in_dev->dev; 279 280 in_dev->dead = 1; 281 282 ip_mc_destroy_dev(in_dev); 283 284 while ((ifa = in_dev->ifa_list) != NULL) { 285 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 286 inet_free_ifa(ifa); 287 } 288 289 RCU_INIT_POINTER(dev->ip_ptr, NULL); 290 291 devinet_sysctl_unregister(in_dev); 292 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 293 arp_ifdown(dev); 294 295 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 296 } 297 298 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 299 { 300 rcu_read_lock(); 301 for_primary_ifa(in_dev) { 302 if (inet_ifa_match(a, ifa)) { 303 if (!b || inet_ifa_match(b, ifa)) { 304 rcu_read_unlock(); 305 return 1; 306 } 307 } 308 } endfor_ifa(in_dev); 309 rcu_read_unlock(); 310 return 0; 311 } 312 313 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 314 int destroy, struct nlmsghdr *nlh, u32 portid) 315 { 316 struct in_ifaddr *promote = NULL; 317 struct in_ifaddr *ifa, *ifa1 = *ifap; 318 struct in_ifaddr *last_prim = in_dev->ifa_list; 319 struct in_ifaddr *prev_prom = NULL; 320 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 321 322 ASSERT_RTNL(); 323 324 /* 1. Deleting primary ifaddr forces deletion all secondaries 325 * unless alias promotion is set 326 **/ 327 328 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 329 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 330 331 while ((ifa = *ifap1) != NULL) { 332 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 333 ifa1->ifa_scope <= ifa->ifa_scope) 334 last_prim = ifa; 335 336 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 337 ifa1->ifa_mask != ifa->ifa_mask || 338 !inet_ifa_match(ifa1->ifa_address, ifa)) { 339 ifap1 = &ifa->ifa_next; 340 prev_prom = ifa; 341 continue; 342 } 343 344 if (!do_promote) { 345 inet_hash_remove(ifa); 346 *ifap1 = ifa->ifa_next; 347 348 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); 349 blocking_notifier_call_chain(&inetaddr_chain, 350 NETDEV_DOWN, ifa); 351 inet_free_ifa(ifa); 352 } else { 353 promote = ifa; 354 break; 355 } 356 } 357 } 358 359 /* On promotion all secondaries from subnet are changing 360 * the primary IP, we must remove all their routes silently 361 * and later to add them back with new prefsrc. Do this 362 * while all addresses are on the device list. 363 */ 364 for (ifa = promote; ifa; ifa = ifa->ifa_next) { 365 if (ifa1->ifa_mask == ifa->ifa_mask && 366 inet_ifa_match(ifa1->ifa_address, ifa)) 367 fib_del_ifaddr(ifa, ifa1); 368 } 369 370 /* 2. Unlink it */ 371 372 *ifap = ifa1->ifa_next; 373 inet_hash_remove(ifa1); 374 375 /* 3. Announce address deletion */ 376 377 /* Send message first, then call notifier. 378 At first sight, FIB update triggered by notifier 379 will refer to already deleted ifaddr, that could confuse 380 netlink listeners. It is not true: look, gated sees 381 that route deleted and if it still thinks that ifaddr 382 is valid, it will try to restore deleted routes... Grr. 383 So that, this order is correct. 384 */ 385 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); 386 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 387 388 if (promote) { 389 struct in_ifaddr *next_sec = promote->ifa_next; 390 391 if (prev_prom) { 392 prev_prom->ifa_next = promote->ifa_next; 393 promote->ifa_next = last_prim->ifa_next; 394 last_prim->ifa_next = promote; 395 } 396 397 promote->ifa_flags &= ~IFA_F_SECONDARY; 398 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); 399 blocking_notifier_call_chain(&inetaddr_chain, 400 NETDEV_UP, promote); 401 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { 402 if (ifa1->ifa_mask != ifa->ifa_mask || 403 !inet_ifa_match(ifa1->ifa_address, ifa)) 404 continue; 405 fib_add_ifaddr(ifa); 406 } 407 408 } 409 if (destroy) 410 inet_free_ifa(ifa1); 411 } 412 413 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 414 int destroy) 415 { 416 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 417 } 418 419 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 420 u32 portid) 421 { 422 struct in_device *in_dev = ifa->ifa_dev; 423 struct in_ifaddr *ifa1, **ifap, **last_primary; 424 425 ASSERT_RTNL(); 426 427 if (!ifa->ifa_local) { 428 inet_free_ifa(ifa); 429 return 0; 430 } 431 432 ifa->ifa_flags &= ~IFA_F_SECONDARY; 433 last_primary = &in_dev->ifa_list; 434 435 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 436 ifap = &ifa1->ifa_next) { 437 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 438 ifa->ifa_scope <= ifa1->ifa_scope) 439 last_primary = &ifa1->ifa_next; 440 if (ifa1->ifa_mask == ifa->ifa_mask && 441 inet_ifa_match(ifa1->ifa_address, ifa)) { 442 if (ifa1->ifa_local == ifa->ifa_local) { 443 inet_free_ifa(ifa); 444 return -EEXIST; 445 } 446 if (ifa1->ifa_scope != ifa->ifa_scope) { 447 inet_free_ifa(ifa); 448 return -EINVAL; 449 } 450 ifa->ifa_flags |= IFA_F_SECONDARY; 451 } 452 } 453 454 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 455 net_srandom(ifa->ifa_local); 456 ifap = last_primary; 457 } 458 459 ifa->ifa_next = *ifap; 460 *ifap = ifa; 461 462 inet_hash_insert(dev_net(in_dev->dev), ifa); 463 464 /* Send message first, then call notifier. 465 Notifier will trigger FIB update, so that 466 listeners of netlink will know about new ifaddr */ 467 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); 468 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 469 470 return 0; 471 } 472 473 static int inet_insert_ifa(struct in_ifaddr *ifa) 474 { 475 return __inet_insert_ifa(ifa, NULL, 0); 476 } 477 478 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 479 { 480 struct in_device *in_dev = __in_dev_get_rtnl(dev); 481 482 ASSERT_RTNL(); 483 484 if (!in_dev) { 485 inet_free_ifa(ifa); 486 return -ENOBUFS; 487 } 488 ipv4_devconf_setall(in_dev); 489 if (ifa->ifa_dev != in_dev) { 490 WARN_ON(ifa->ifa_dev); 491 in_dev_hold(in_dev); 492 ifa->ifa_dev = in_dev; 493 } 494 if (ipv4_is_loopback(ifa->ifa_local)) 495 ifa->ifa_scope = RT_SCOPE_HOST; 496 return inet_insert_ifa(ifa); 497 } 498 499 /* Caller must hold RCU or RTNL : 500 * We dont take a reference on found in_device 501 */ 502 struct in_device *inetdev_by_index(struct net *net, int ifindex) 503 { 504 struct net_device *dev; 505 struct in_device *in_dev = NULL; 506 507 rcu_read_lock(); 508 dev = dev_get_by_index_rcu(net, ifindex); 509 if (dev) 510 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 511 rcu_read_unlock(); 512 return in_dev; 513 } 514 EXPORT_SYMBOL(inetdev_by_index); 515 516 /* Called only from RTNL semaphored context. No locks. */ 517 518 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 519 __be32 mask) 520 { 521 ASSERT_RTNL(); 522 523 for_primary_ifa(in_dev) { 524 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 525 return ifa; 526 } endfor_ifa(in_dev); 527 return NULL; 528 } 529 530 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 531 { 532 struct net *net = sock_net(skb->sk); 533 struct nlattr *tb[IFA_MAX+1]; 534 struct in_device *in_dev; 535 struct ifaddrmsg *ifm; 536 struct in_ifaddr *ifa, **ifap; 537 int err = -EINVAL; 538 539 ASSERT_RTNL(); 540 541 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 542 if (err < 0) 543 goto errout; 544 545 ifm = nlmsg_data(nlh); 546 in_dev = inetdev_by_index(net, ifm->ifa_index); 547 if (in_dev == NULL) { 548 err = -ENODEV; 549 goto errout; 550 } 551 552 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 553 ifap = &ifa->ifa_next) { 554 if (tb[IFA_LOCAL] && 555 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) 556 continue; 557 558 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 559 continue; 560 561 if (tb[IFA_ADDRESS] && 562 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 563 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) 564 continue; 565 566 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); 567 return 0; 568 } 569 570 err = -EADDRNOTAVAIL; 571 errout: 572 return err; 573 } 574 575 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) 576 { 577 struct nlattr *tb[IFA_MAX+1]; 578 struct in_ifaddr *ifa; 579 struct ifaddrmsg *ifm; 580 struct net_device *dev; 581 struct in_device *in_dev; 582 int err; 583 584 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 585 if (err < 0) 586 goto errout; 587 588 ifm = nlmsg_data(nlh); 589 err = -EINVAL; 590 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) 591 goto errout; 592 593 dev = __dev_get_by_index(net, ifm->ifa_index); 594 err = -ENODEV; 595 if (dev == NULL) 596 goto errout; 597 598 in_dev = __in_dev_get_rtnl(dev); 599 err = -ENOBUFS; 600 if (in_dev == NULL) 601 goto errout; 602 603 ifa = inet_alloc_ifa(); 604 if (ifa == NULL) 605 /* 606 * A potential indev allocation can be left alive, it stays 607 * assigned to its device and is destroy with it. 608 */ 609 goto errout; 610 611 ipv4_devconf_setall(in_dev); 612 in_dev_hold(in_dev); 613 614 if (tb[IFA_ADDRESS] == NULL) 615 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 616 617 INIT_HLIST_NODE(&ifa->hash); 618 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 619 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 620 ifa->ifa_flags = ifm->ifa_flags; 621 ifa->ifa_scope = ifm->ifa_scope; 622 ifa->ifa_dev = in_dev; 623 624 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); 625 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); 626 627 if (tb[IFA_BROADCAST]) 628 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 629 630 if (tb[IFA_LABEL]) 631 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 632 else 633 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 634 635 return ifa; 636 637 errout: 638 return ERR_PTR(err); 639 } 640 641 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 642 { 643 struct net *net = sock_net(skb->sk); 644 struct in_ifaddr *ifa; 645 646 ASSERT_RTNL(); 647 648 ifa = rtm_to_ifaddr(net, nlh); 649 if (IS_ERR(ifa)) 650 return PTR_ERR(ifa); 651 652 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); 653 } 654 655 /* 656 * Determine a default network mask, based on the IP address. 657 */ 658 659 static int inet_abc_len(__be32 addr) 660 { 661 int rc = -1; /* Something else, probably a multicast. */ 662 663 if (ipv4_is_zeronet(addr)) 664 rc = 0; 665 else { 666 __u32 haddr = ntohl(addr); 667 668 if (IN_CLASSA(haddr)) 669 rc = 8; 670 else if (IN_CLASSB(haddr)) 671 rc = 16; 672 else if (IN_CLASSC(haddr)) 673 rc = 24; 674 } 675 676 return rc; 677 } 678 679 680 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 681 { 682 struct ifreq ifr; 683 struct sockaddr_in sin_orig; 684 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 685 struct in_device *in_dev; 686 struct in_ifaddr **ifap = NULL; 687 struct in_ifaddr *ifa = NULL; 688 struct net_device *dev; 689 char *colon; 690 int ret = -EFAULT; 691 int tryaddrmatch = 0; 692 693 /* 694 * Fetch the caller's info block into kernel space 695 */ 696 697 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 698 goto out; 699 ifr.ifr_name[IFNAMSIZ - 1] = 0; 700 701 /* save original address for comparison */ 702 memcpy(&sin_orig, sin, sizeof(*sin)); 703 704 colon = strchr(ifr.ifr_name, ':'); 705 if (colon) 706 *colon = 0; 707 708 dev_load(net, ifr.ifr_name); 709 710 switch (cmd) { 711 case SIOCGIFADDR: /* Get interface address */ 712 case SIOCGIFBRDADDR: /* Get the broadcast address */ 713 case SIOCGIFDSTADDR: /* Get the destination address */ 714 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 715 /* Note that these ioctls will not sleep, 716 so that we do not impose a lock. 717 One day we will be forced to put shlock here (I mean SMP) 718 */ 719 tryaddrmatch = (sin_orig.sin_family == AF_INET); 720 memset(sin, 0, sizeof(*sin)); 721 sin->sin_family = AF_INET; 722 break; 723 724 case SIOCSIFFLAGS: 725 ret = -EPERM; 726 if (!capable(CAP_NET_ADMIN)) 727 goto out; 728 break; 729 case SIOCSIFADDR: /* Set interface address (and family) */ 730 case SIOCSIFBRDADDR: /* Set the broadcast address */ 731 case SIOCSIFDSTADDR: /* Set the destination address */ 732 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 733 ret = -EPERM; 734 if (!capable(CAP_NET_ADMIN)) 735 goto out; 736 ret = -EINVAL; 737 if (sin->sin_family != AF_INET) 738 goto out; 739 break; 740 default: 741 ret = -EINVAL; 742 goto out; 743 } 744 745 rtnl_lock(); 746 747 ret = -ENODEV; 748 dev = __dev_get_by_name(net, ifr.ifr_name); 749 if (!dev) 750 goto done; 751 752 if (colon) 753 *colon = ':'; 754 755 in_dev = __in_dev_get_rtnl(dev); 756 if (in_dev) { 757 if (tryaddrmatch) { 758 /* Matthias Andree */ 759 /* compare label and address (4.4BSD style) */ 760 /* note: we only do this for a limited set of ioctls 761 and only if the original address family was AF_INET. 762 This is checked above. */ 763 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 764 ifap = &ifa->ifa_next) { 765 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 766 sin_orig.sin_addr.s_addr == 767 ifa->ifa_local) { 768 break; /* found */ 769 } 770 } 771 } 772 /* we didn't get a match, maybe the application is 773 4.3BSD-style and passed in junk so we fall back to 774 comparing just the label */ 775 if (!ifa) { 776 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 777 ifap = &ifa->ifa_next) 778 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 779 break; 780 } 781 } 782 783 ret = -EADDRNOTAVAIL; 784 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 785 goto done; 786 787 switch (cmd) { 788 case SIOCGIFADDR: /* Get interface address */ 789 sin->sin_addr.s_addr = ifa->ifa_local; 790 goto rarok; 791 792 case SIOCGIFBRDADDR: /* Get the broadcast address */ 793 sin->sin_addr.s_addr = ifa->ifa_broadcast; 794 goto rarok; 795 796 case SIOCGIFDSTADDR: /* Get the destination address */ 797 sin->sin_addr.s_addr = ifa->ifa_address; 798 goto rarok; 799 800 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 801 sin->sin_addr.s_addr = ifa->ifa_mask; 802 goto rarok; 803 804 case SIOCSIFFLAGS: 805 if (colon) { 806 ret = -EADDRNOTAVAIL; 807 if (!ifa) 808 break; 809 ret = 0; 810 if (!(ifr.ifr_flags & IFF_UP)) 811 inet_del_ifa(in_dev, ifap, 1); 812 break; 813 } 814 ret = dev_change_flags(dev, ifr.ifr_flags); 815 break; 816 817 case SIOCSIFADDR: /* Set interface address (and family) */ 818 ret = -EINVAL; 819 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 820 break; 821 822 if (!ifa) { 823 ret = -ENOBUFS; 824 ifa = inet_alloc_ifa(); 825 INIT_HLIST_NODE(&ifa->hash); 826 if (!ifa) 827 break; 828 if (colon) 829 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 830 else 831 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 832 } else { 833 ret = 0; 834 if (ifa->ifa_local == sin->sin_addr.s_addr) 835 break; 836 inet_del_ifa(in_dev, ifap, 0); 837 ifa->ifa_broadcast = 0; 838 ifa->ifa_scope = 0; 839 } 840 841 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 842 843 if (!(dev->flags & IFF_POINTOPOINT)) { 844 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 845 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 846 if ((dev->flags & IFF_BROADCAST) && 847 ifa->ifa_prefixlen < 31) 848 ifa->ifa_broadcast = ifa->ifa_address | 849 ~ifa->ifa_mask; 850 } else { 851 ifa->ifa_prefixlen = 32; 852 ifa->ifa_mask = inet_make_mask(32); 853 } 854 ret = inet_set_ifa(dev, ifa); 855 break; 856 857 case SIOCSIFBRDADDR: /* Set the broadcast address */ 858 ret = 0; 859 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 860 inet_del_ifa(in_dev, ifap, 0); 861 ifa->ifa_broadcast = sin->sin_addr.s_addr; 862 inet_insert_ifa(ifa); 863 } 864 break; 865 866 case SIOCSIFDSTADDR: /* Set the destination address */ 867 ret = 0; 868 if (ifa->ifa_address == sin->sin_addr.s_addr) 869 break; 870 ret = -EINVAL; 871 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 872 break; 873 ret = 0; 874 inet_del_ifa(in_dev, ifap, 0); 875 ifa->ifa_address = sin->sin_addr.s_addr; 876 inet_insert_ifa(ifa); 877 break; 878 879 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 880 881 /* 882 * The mask we set must be legal. 883 */ 884 ret = -EINVAL; 885 if (bad_mask(sin->sin_addr.s_addr, 0)) 886 break; 887 ret = 0; 888 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 889 __be32 old_mask = ifa->ifa_mask; 890 inet_del_ifa(in_dev, ifap, 0); 891 ifa->ifa_mask = sin->sin_addr.s_addr; 892 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 893 894 /* See if current broadcast address matches 895 * with current netmask, then recalculate 896 * the broadcast address. Otherwise it's a 897 * funny address, so don't touch it since 898 * the user seems to know what (s)he's doing... 899 */ 900 if ((dev->flags & IFF_BROADCAST) && 901 (ifa->ifa_prefixlen < 31) && 902 (ifa->ifa_broadcast == 903 (ifa->ifa_local|~old_mask))) { 904 ifa->ifa_broadcast = (ifa->ifa_local | 905 ~sin->sin_addr.s_addr); 906 } 907 inet_insert_ifa(ifa); 908 } 909 break; 910 } 911 done: 912 rtnl_unlock(); 913 out: 914 return ret; 915 rarok: 916 rtnl_unlock(); 917 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 918 goto out; 919 } 920 921 static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 922 { 923 struct in_device *in_dev = __in_dev_get_rtnl(dev); 924 struct in_ifaddr *ifa; 925 struct ifreq ifr; 926 int done = 0; 927 928 if (!in_dev) 929 goto out; 930 931 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 932 if (!buf) { 933 done += sizeof(ifr); 934 continue; 935 } 936 if (len < (int) sizeof(ifr)) 937 break; 938 memset(&ifr, 0, sizeof(struct ifreq)); 939 if (ifa->ifa_label) 940 strcpy(ifr.ifr_name, ifa->ifa_label); 941 else 942 strcpy(ifr.ifr_name, dev->name); 943 944 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 945 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 946 ifa->ifa_local; 947 948 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 949 done = -EFAULT; 950 break; 951 } 952 buf += sizeof(struct ifreq); 953 len -= sizeof(struct ifreq); 954 done += sizeof(struct ifreq); 955 } 956 out: 957 return done; 958 } 959 960 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 961 { 962 __be32 addr = 0; 963 struct in_device *in_dev; 964 struct net *net = dev_net(dev); 965 966 rcu_read_lock(); 967 in_dev = __in_dev_get_rcu(dev); 968 if (!in_dev) 969 goto no_in_dev; 970 971 for_primary_ifa(in_dev) { 972 if (ifa->ifa_scope > scope) 973 continue; 974 if (!dst || inet_ifa_match(dst, ifa)) { 975 addr = ifa->ifa_local; 976 break; 977 } 978 if (!addr) 979 addr = ifa->ifa_local; 980 } endfor_ifa(in_dev); 981 982 if (addr) 983 goto out_unlock; 984 no_in_dev: 985 986 /* Not loopback addresses on loopback should be preferred 987 in this case. It is importnat that lo is the first interface 988 in dev_base list. 989 */ 990 for_each_netdev_rcu(net, dev) { 991 in_dev = __in_dev_get_rcu(dev); 992 if (!in_dev) 993 continue; 994 995 for_primary_ifa(in_dev) { 996 if (ifa->ifa_scope != RT_SCOPE_LINK && 997 ifa->ifa_scope <= scope) { 998 addr = ifa->ifa_local; 999 goto out_unlock; 1000 } 1001 } endfor_ifa(in_dev); 1002 } 1003 out_unlock: 1004 rcu_read_unlock(); 1005 return addr; 1006 } 1007 EXPORT_SYMBOL(inet_select_addr); 1008 1009 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 1010 __be32 local, int scope) 1011 { 1012 int same = 0; 1013 __be32 addr = 0; 1014 1015 for_ifa(in_dev) { 1016 if (!addr && 1017 (local == ifa->ifa_local || !local) && 1018 ifa->ifa_scope <= scope) { 1019 addr = ifa->ifa_local; 1020 if (same) 1021 break; 1022 } 1023 if (!same) { 1024 same = (!local || inet_ifa_match(local, ifa)) && 1025 (!dst || inet_ifa_match(dst, ifa)); 1026 if (same && addr) { 1027 if (local || !dst) 1028 break; 1029 /* Is the selected addr into dst subnet? */ 1030 if (inet_ifa_match(addr, ifa)) 1031 break; 1032 /* No, then can we use new local src? */ 1033 if (ifa->ifa_scope <= scope) { 1034 addr = ifa->ifa_local; 1035 break; 1036 } 1037 /* search for large dst subnet for addr */ 1038 same = 0; 1039 } 1040 } 1041 } endfor_ifa(in_dev); 1042 1043 return same ? addr : 0; 1044 } 1045 1046 /* 1047 * Confirm that local IP address exists using wildcards: 1048 * - in_dev: only on this interface, 0=any interface 1049 * - dst: only in the same subnet as dst, 0=any dst 1050 * - local: address, 0=autoselect the local address 1051 * - scope: maximum allowed scope value for the local address 1052 */ 1053 __be32 inet_confirm_addr(struct in_device *in_dev, 1054 __be32 dst, __be32 local, int scope) 1055 { 1056 __be32 addr = 0; 1057 struct net_device *dev; 1058 struct net *net; 1059 1060 if (scope != RT_SCOPE_LINK) 1061 return confirm_addr_indev(in_dev, dst, local, scope); 1062 1063 net = dev_net(in_dev->dev); 1064 rcu_read_lock(); 1065 for_each_netdev_rcu(net, dev) { 1066 in_dev = __in_dev_get_rcu(dev); 1067 if (in_dev) { 1068 addr = confirm_addr_indev(in_dev, dst, local, scope); 1069 if (addr) 1070 break; 1071 } 1072 } 1073 rcu_read_unlock(); 1074 1075 return addr; 1076 } 1077 EXPORT_SYMBOL(inet_confirm_addr); 1078 1079 /* 1080 * Device notifier 1081 */ 1082 1083 int register_inetaddr_notifier(struct notifier_block *nb) 1084 { 1085 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1086 } 1087 EXPORT_SYMBOL(register_inetaddr_notifier); 1088 1089 int unregister_inetaddr_notifier(struct notifier_block *nb) 1090 { 1091 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1092 } 1093 EXPORT_SYMBOL(unregister_inetaddr_notifier); 1094 1095 /* Rename ifa_labels for a device name change. Make some effort to preserve 1096 * existing alias numbering and to create unique labels if possible. 1097 */ 1098 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1099 { 1100 struct in_ifaddr *ifa; 1101 int named = 0; 1102 1103 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1104 char old[IFNAMSIZ], *dot; 1105 1106 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1107 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1108 if (named++ == 0) 1109 goto skip; 1110 dot = strchr(old, ':'); 1111 if (dot == NULL) { 1112 sprintf(old, ":%d", named); 1113 dot = old; 1114 } 1115 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1116 strcat(ifa->ifa_label, dot); 1117 else 1118 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1119 skip: 1120 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1121 } 1122 } 1123 1124 static bool inetdev_valid_mtu(unsigned int mtu) 1125 { 1126 return mtu >= 68; 1127 } 1128 1129 static void inetdev_send_gratuitous_arp(struct net_device *dev, 1130 struct in_device *in_dev) 1131 1132 { 1133 struct in_ifaddr *ifa; 1134 1135 for (ifa = in_dev->ifa_list; ifa; 1136 ifa = ifa->ifa_next) { 1137 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1138 ifa->ifa_local, dev, 1139 ifa->ifa_local, NULL, 1140 dev->dev_addr, NULL); 1141 } 1142 } 1143 1144 /* Called only under RTNL semaphore */ 1145 1146 static int inetdev_event(struct notifier_block *this, unsigned long event, 1147 void *ptr) 1148 { 1149 struct net_device *dev = ptr; 1150 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1151 1152 ASSERT_RTNL(); 1153 1154 if (!in_dev) { 1155 if (event == NETDEV_REGISTER) { 1156 in_dev = inetdev_init(dev); 1157 if (!in_dev) 1158 return notifier_from_errno(-ENOMEM); 1159 if (dev->flags & IFF_LOOPBACK) { 1160 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1161 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1162 } 1163 } else if (event == NETDEV_CHANGEMTU) { 1164 /* Re-enabling IP */ 1165 if (inetdev_valid_mtu(dev->mtu)) 1166 in_dev = inetdev_init(dev); 1167 } 1168 goto out; 1169 } 1170 1171 switch (event) { 1172 case NETDEV_REGISTER: 1173 pr_debug("%s: bug\n", __func__); 1174 RCU_INIT_POINTER(dev->ip_ptr, NULL); 1175 break; 1176 case NETDEV_UP: 1177 if (!inetdev_valid_mtu(dev->mtu)) 1178 break; 1179 if (dev->flags & IFF_LOOPBACK) { 1180 struct in_ifaddr *ifa = inet_alloc_ifa(); 1181 1182 if (ifa) { 1183 INIT_HLIST_NODE(&ifa->hash); 1184 ifa->ifa_local = 1185 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1186 ifa->ifa_prefixlen = 8; 1187 ifa->ifa_mask = inet_make_mask(8); 1188 in_dev_hold(in_dev); 1189 ifa->ifa_dev = in_dev; 1190 ifa->ifa_scope = RT_SCOPE_HOST; 1191 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1192 inet_insert_ifa(ifa); 1193 } 1194 } 1195 ip_mc_up(in_dev); 1196 /* fall through */ 1197 case NETDEV_CHANGEADDR: 1198 if (!IN_DEV_ARP_NOTIFY(in_dev)) 1199 break; 1200 /* fall through */ 1201 case NETDEV_NOTIFY_PEERS: 1202 /* Send gratuitous ARP to notify of link change */ 1203 inetdev_send_gratuitous_arp(dev, in_dev); 1204 break; 1205 case NETDEV_DOWN: 1206 ip_mc_down(in_dev); 1207 break; 1208 case NETDEV_PRE_TYPE_CHANGE: 1209 ip_mc_unmap(in_dev); 1210 break; 1211 case NETDEV_POST_TYPE_CHANGE: 1212 ip_mc_remap(in_dev); 1213 break; 1214 case NETDEV_CHANGEMTU: 1215 if (inetdev_valid_mtu(dev->mtu)) 1216 break; 1217 /* disable IP when MTU is not enough */ 1218 case NETDEV_UNREGISTER: 1219 inetdev_destroy(in_dev); 1220 break; 1221 case NETDEV_CHANGENAME: 1222 /* Do not notify about label change, this event is 1223 * not interesting to applications using netlink. 1224 */ 1225 inetdev_changename(dev, in_dev); 1226 1227 devinet_sysctl_unregister(in_dev); 1228 devinet_sysctl_register(in_dev); 1229 break; 1230 } 1231 out: 1232 return NOTIFY_DONE; 1233 } 1234 1235 static struct notifier_block ip_netdev_notifier = { 1236 .notifier_call = inetdev_event, 1237 }; 1238 1239 static size_t inet_nlmsg_size(void) 1240 { 1241 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1242 + nla_total_size(4) /* IFA_ADDRESS */ 1243 + nla_total_size(4) /* IFA_LOCAL */ 1244 + nla_total_size(4) /* IFA_BROADCAST */ 1245 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1246 } 1247 1248 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1249 u32 portid, u32 seq, int event, unsigned int flags) 1250 { 1251 struct ifaddrmsg *ifm; 1252 struct nlmsghdr *nlh; 1253 1254 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); 1255 if (nlh == NULL) 1256 return -EMSGSIZE; 1257 1258 ifm = nlmsg_data(nlh); 1259 ifm->ifa_family = AF_INET; 1260 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1261 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; 1262 ifm->ifa_scope = ifa->ifa_scope; 1263 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1264 1265 if ((ifa->ifa_address && 1266 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) || 1267 (ifa->ifa_local && 1268 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) || 1269 (ifa->ifa_broadcast && 1270 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) || 1271 (ifa->ifa_label[0] && 1272 nla_put_string(skb, IFA_LABEL, ifa->ifa_label))) 1273 goto nla_put_failure; 1274 1275 return nlmsg_end(skb, nlh); 1276 1277 nla_put_failure: 1278 nlmsg_cancel(skb, nlh); 1279 return -EMSGSIZE; 1280 } 1281 1282 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1283 { 1284 struct net *net = sock_net(skb->sk); 1285 int h, s_h; 1286 int idx, s_idx; 1287 int ip_idx, s_ip_idx; 1288 struct net_device *dev; 1289 struct in_device *in_dev; 1290 struct in_ifaddr *ifa; 1291 struct hlist_head *head; 1292 struct hlist_node *node; 1293 1294 s_h = cb->args[0]; 1295 s_idx = idx = cb->args[1]; 1296 s_ip_idx = ip_idx = cb->args[2]; 1297 1298 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1299 idx = 0; 1300 head = &net->dev_index_head[h]; 1301 rcu_read_lock(); 1302 hlist_for_each_entry_rcu(dev, node, head, index_hlist) { 1303 if (idx < s_idx) 1304 goto cont; 1305 if (h > s_h || idx > s_idx) 1306 s_ip_idx = 0; 1307 in_dev = __in_dev_get_rcu(dev); 1308 if (!in_dev) 1309 goto cont; 1310 1311 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1312 ifa = ifa->ifa_next, ip_idx++) { 1313 if (ip_idx < s_ip_idx) 1314 continue; 1315 if (inet_fill_ifaddr(skb, ifa, 1316 NETLINK_CB(cb->skb).portid, 1317 cb->nlh->nlmsg_seq, 1318 RTM_NEWADDR, NLM_F_MULTI) <= 0) { 1319 rcu_read_unlock(); 1320 goto done; 1321 } 1322 } 1323 cont: 1324 idx++; 1325 } 1326 rcu_read_unlock(); 1327 } 1328 1329 done: 1330 cb->args[0] = h; 1331 cb->args[1] = idx; 1332 cb->args[2] = ip_idx; 1333 1334 return skb->len; 1335 } 1336 1337 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1338 u32 portid) 1339 { 1340 struct sk_buff *skb; 1341 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1342 int err = -ENOBUFS; 1343 struct net *net; 1344 1345 net = dev_net(ifa->ifa_dev->dev); 1346 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1347 if (skb == NULL) 1348 goto errout; 1349 1350 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); 1351 if (err < 0) { 1352 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1353 WARN_ON(err == -EMSGSIZE); 1354 kfree_skb(skb); 1355 goto errout; 1356 } 1357 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1358 return; 1359 errout: 1360 if (err < 0) 1361 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1362 } 1363 1364 static size_t inet_get_link_af_size(const struct net_device *dev) 1365 { 1366 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1367 1368 if (!in_dev) 1369 return 0; 1370 1371 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1372 } 1373 1374 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1375 { 1376 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1377 struct nlattr *nla; 1378 int i; 1379 1380 if (!in_dev) 1381 return -ENODATA; 1382 1383 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1384 if (nla == NULL) 1385 return -EMSGSIZE; 1386 1387 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1388 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1389 1390 return 0; 1391 } 1392 1393 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1394 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1395 }; 1396 1397 static int inet_validate_link_af(const struct net_device *dev, 1398 const struct nlattr *nla) 1399 { 1400 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1401 int err, rem; 1402 1403 if (dev && !__in_dev_get_rtnl(dev)) 1404 return -EAFNOSUPPORT; 1405 1406 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); 1407 if (err < 0) 1408 return err; 1409 1410 if (tb[IFLA_INET_CONF]) { 1411 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1412 int cfgid = nla_type(a); 1413 1414 if (nla_len(a) < 4) 1415 return -EINVAL; 1416 1417 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1418 return -EINVAL; 1419 } 1420 } 1421 1422 return 0; 1423 } 1424 1425 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1426 { 1427 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1428 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1429 int rem; 1430 1431 if (!in_dev) 1432 return -EAFNOSUPPORT; 1433 1434 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) 1435 BUG(); 1436 1437 if (tb[IFLA_INET_CONF]) { 1438 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1439 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1440 } 1441 1442 return 0; 1443 } 1444 1445 #ifdef CONFIG_SYSCTL 1446 1447 static void devinet_copy_dflt_conf(struct net *net, int i) 1448 { 1449 struct net_device *dev; 1450 1451 rcu_read_lock(); 1452 for_each_netdev_rcu(net, dev) { 1453 struct in_device *in_dev; 1454 1455 in_dev = __in_dev_get_rcu(dev); 1456 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1457 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 1458 } 1459 rcu_read_unlock(); 1460 } 1461 1462 /* called with RTNL locked */ 1463 static void inet_forward_change(struct net *net) 1464 { 1465 struct net_device *dev; 1466 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 1467 1468 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1469 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1470 1471 for_each_netdev(net, dev) { 1472 struct in_device *in_dev; 1473 if (on) 1474 dev_disable_lro(dev); 1475 rcu_read_lock(); 1476 in_dev = __in_dev_get_rcu(dev); 1477 if (in_dev) 1478 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1479 rcu_read_unlock(); 1480 } 1481 } 1482 1483 static int devinet_conf_proc(ctl_table *ctl, int write, 1484 void __user *buffer, 1485 size_t *lenp, loff_t *ppos) 1486 { 1487 int old_value = *(int *)ctl->data; 1488 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1489 int new_value = *(int *)ctl->data; 1490 1491 if (write) { 1492 struct ipv4_devconf *cnf = ctl->extra1; 1493 struct net *net = ctl->extra2; 1494 int i = (int *)ctl->data - cnf->data; 1495 1496 set_bit(i, cnf->state); 1497 1498 if (cnf == net->ipv4.devconf_dflt) 1499 devinet_copy_dflt_conf(net, i); 1500 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || 1501 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) 1502 if ((new_value == 0) && (old_value != 0)) 1503 rt_cache_flush(net); 1504 } 1505 1506 return ret; 1507 } 1508 1509 static int devinet_sysctl_forward(ctl_table *ctl, int write, 1510 void __user *buffer, 1511 size_t *lenp, loff_t *ppos) 1512 { 1513 int *valp = ctl->data; 1514 int val = *valp; 1515 loff_t pos = *ppos; 1516 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1517 1518 if (write && *valp != val) { 1519 struct net *net = ctl->extra2; 1520 1521 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 1522 if (!rtnl_trylock()) { 1523 /* Restore the original values before restarting */ 1524 *valp = val; 1525 *ppos = pos; 1526 return restart_syscall(); 1527 } 1528 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1529 inet_forward_change(net); 1530 } else if (*valp) { 1531 struct ipv4_devconf *cnf = ctl->extra1; 1532 struct in_device *idev = 1533 container_of(cnf, struct in_device, cnf); 1534 dev_disable_lro(idev->dev); 1535 } 1536 rtnl_unlock(); 1537 rt_cache_flush(net); 1538 } 1539 } 1540 1541 return ret; 1542 } 1543 1544 static int ipv4_doint_and_flush(ctl_table *ctl, int write, 1545 void __user *buffer, 1546 size_t *lenp, loff_t *ppos) 1547 { 1548 int *valp = ctl->data; 1549 int val = *valp; 1550 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1551 struct net *net = ctl->extra2; 1552 1553 if (write && *valp != val) 1554 rt_cache_flush(net); 1555 1556 return ret; 1557 } 1558 1559 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 1560 { \ 1561 .procname = name, \ 1562 .data = ipv4_devconf.data + \ 1563 IPV4_DEVCONF_ ## attr - 1, \ 1564 .maxlen = sizeof(int), \ 1565 .mode = mval, \ 1566 .proc_handler = proc, \ 1567 .extra1 = &ipv4_devconf, \ 1568 } 1569 1570 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 1571 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 1572 1573 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 1574 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 1575 1576 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 1577 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 1578 1579 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 1580 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 1581 1582 static struct devinet_sysctl_table { 1583 struct ctl_table_header *sysctl_header; 1584 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 1585 } devinet_sysctl = { 1586 .devinet_vars = { 1587 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 1588 devinet_sysctl_forward), 1589 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 1590 1591 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 1592 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 1593 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 1594 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 1595 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 1596 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 1597 "accept_source_route"), 1598 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 1599 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 1600 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 1601 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 1602 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 1603 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 1604 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 1605 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 1606 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 1607 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1608 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1609 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 1610 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 1611 1612 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1613 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1614 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION, 1615 "force_igmp_version"), 1616 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 1617 "promote_secondaries"), 1618 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 1619 "route_localnet"), 1620 }, 1621 }; 1622 1623 static int __devinet_sysctl_register(struct net *net, char *dev_name, 1624 struct ipv4_devconf *p) 1625 { 1626 int i; 1627 struct devinet_sysctl_table *t; 1628 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ]; 1629 1630 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 1631 if (!t) 1632 goto out; 1633 1634 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 1635 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 1636 t->devinet_vars[i].extra1 = p; 1637 t->devinet_vars[i].extra2 = net; 1638 } 1639 1640 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name); 1641 1642 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars); 1643 if (!t->sysctl_header) 1644 goto free; 1645 1646 p->sysctl = t; 1647 return 0; 1648 1649 free: 1650 kfree(t); 1651 out: 1652 return -ENOBUFS; 1653 } 1654 1655 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) 1656 { 1657 struct devinet_sysctl_table *t = cnf->sysctl; 1658 1659 if (t == NULL) 1660 return; 1661 1662 cnf->sysctl = NULL; 1663 unregister_net_sysctl_table(t->sysctl_header); 1664 kfree(t); 1665 } 1666 1667 static void devinet_sysctl_register(struct in_device *idev) 1668 { 1669 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); 1670 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 1671 &idev->cnf); 1672 } 1673 1674 static void devinet_sysctl_unregister(struct in_device *idev) 1675 { 1676 __devinet_sysctl_unregister(&idev->cnf); 1677 neigh_sysctl_unregister(idev->arp_parms); 1678 } 1679 1680 static struct ctl_table ctl_forward_entry[] = { 1681 { 1682 .procname = "ip_forward", 1683 .data = &ipv4_devconf.data[ 1684 IPV4_DEVCONF_FORWARDING - 1], 1685 .maxlen = sizeof(int), 1686 .mode = 0644, 1687 .proc_handler = devinet_sysctl_forward, 1688 .extra1 = &ipv4_devconf, 1689 .extra2 = &init_net, 1690 }, 1691 { }, 1692 }; 1693 #endif 1694 1695 static __net_init int devinet_init_net(struct net *net) 1696 { 1697 int err; 1698 struct ipv4_devconf *all, *dflt; 1699 #ifdef CONFIG_SYSCTL 1700 struct ctl_table *tbl = ctl_forward_entry; 1701 struct ctl_table_header *forw_hdr; 1702 #endif 1703 1704 err = -ENOMEM; 1705 all = &ipv4_devconf; 1706 dflt = &ipv4_devconf_dflt; 1707 1708 if (!net_eq(net, &init_net)) { 1709 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 1710 if (all == NULL) 1711 goto err_alloc_all; 1712 1713 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 1714 if (dflt == NULL) 1715 goto err_alloc_dflt; 1716 1717 #ifdef CONFIG_SYSCTL 1718 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 1719 if (tbl == NULL) 1720 goto err_alloc_ctl; 1721 1722 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 1723 tbl[0].extra1 = all; 1724 tbl[0].extra2 = net; 1725 #endif 1726 } 1727 1728 #ifdef CONFIG_SYSCTL 1729 err = __devinet_sysctl_register(net, "all", all); 1730 if (err < 0) 1731 goto err_reg_all; 1732 1733 err = __devinet_sysctl_register(net, "default", dflt); 1734 if (err < 0) 1735 goto err_reg_dflt; 1736 1737 err = -ENOMEM; 1738 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); 1739 if (forw_hdr == NULL) 1740 goto err_reg_ctl; 1741 net->ipv4.forw_hdr = forw_hdr; 1742 #endif 1743 1744 net->ipv4.devconf_all = all; 1745 net->ipv4.devconf_dflt = dflt; 1746 return 0; 1747 1748 #ifdef CONFIG_SYSCTL 1749 err_reg_ctl: 1750 __devinet_sysctl_unregister(dflt); 1751 err_reg_dflt: 1752 __devinet_sysctl_unregister(all); 1753 err_reg_all: 1754 if (tbl != ctl_forward_entry) 1755 kfree(tbl); 1756 err_alloc_ctl: 1757 #endif 1758 if (dflt != &ipv4_devconf_dflt) 1759 kfree(dflt); 1760 err_alloc_dflt: 1761 if (all != &ipv4_devconf) 1762 kfree(all); 1763 err_alloc_all: 1764 return err; 1765 } 1766 1767 static __net_exit void devinet_exit_net(struct net *net) 1768 { 1769 #ifdef CONFIG_SYSCTL 1770 struct ctl_table *tbl; 1771 1772 tbl = net->ipv4.forw_hdr->ctl_table_arg; 1773 unregister_net_sysctl_table(net->ipv4.forw_hdr); 1774 __devinet_sysctl_unregister(net->ipv4.devconf_dflt); 1775 __devinet_sysctl_unregister(net->ipv4.devconf_all); 1776 kfree(tbl); 1777 #endif 1778 kfree(net->ipv4.devconf_dflt); 1779 kfree(net->ipv4.devconf_all); 1780 } 1781 1782 static __net_initdata struct pernet_operations devinet_ops = { 1783 .init = devinet_init_net, 1784 .exit = devinet_exit_net, 1785 }; 1786 1787 static struct rtnl_af_ops inet_af_ops = { 1788 .family = AF_INET, 1789 .fill_link_af = inet_fill_link_af, 1790 .get_link_af_size = inet_get_link_af_size, 1791 .validate_link_af = inet_validate_link_af, 1792 .set_link_af = inet_set_link_af, 1793 }; 1794 1795 void __init devinet_init(void) 1796 { 1797 int i; 1798 1799 for (i = 0; i < IN4_ADDR_HSIZE; i++) 1800 INIT_HLIST_HEAD(&inet_addr_lst[i]); 1801 1802 register_pernet_subsys(&devinet_ops); 1803 1804 register_gifconf(PF_INET, inet_gifconf); 1805 register_netdevice_notifier(&ip_netdev_notifier); 1806 1807 rtnl_af_register(&inet_af_ops); 1808 1809 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 1810 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 1811 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 1812 } 1813 1814