1 /* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29 #include <asm/uaccess.h> 30 #include <linux/bitops.h> 31 #include <linux/capability.h> 32 #include <linux/module.h> 33 #include <linux/types.h> 34 #include <linux/kernel.h> 35 #include <linux/string.h> 36 #include <linux/mm.h> 37 #include <linux/socket.h> 38 #include <linux/sockios.h> 39 #include <linux/in.h> 40 #include <linux/errno.h> 41 #include <linux/interrupt.h> 42 #include <linux/if_addr.h> 43 #include <linux/if_ether.h> 44 #include <linux/inet.h> 45 #include <linux/netdevice.h> 46 #include <linux/etherdevice.h> 47 #include <linux/skbuff.h> 48 #include <linux/init.h> 49 #include <linux/notifier.h> 50 #include <linux/inetdevice.h> 51 #include <linux/igmp.h> 52 #include <linux/slab.h> 53 #include <linux/hash.h> 54 #ifdef CONFIG_SYSCTL 55 #include <linux/sysctl.h> 56 #endif 57 #include <linux/kmod.h> 58 59 #include <net/arp.h> 60 #include <net/ip.h> 61 #include <net/route.h> 62 #include <net/ip_fib.h> 63 #include <net/rtnetlink.h> 64 #include <net/net_namespace.h> 65 66 #include "fib_lookup.h" 67 68 static struct ipv4_devconf ipv4_devconf = { 69 .data = { 70 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 71 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 72 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 73 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 74 }, 75 }; 76 77 static struct ipv4_devconf ipv4_devconf_dflt = { 78 .data = { 79 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 80 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 81 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 82 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 83 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 84 }, 85 }; 86 87 #define IPV4_DEVCONF_DFLT(net, attr) \ 88 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 89 90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 91 [IFA_LOCAL] = { .type = NLA_U32 }, 92 [IFA_ADDRESS] = { .type = NLA_U32 }, 93 [IFA_BROADCAST] = { .type = NLA_U32 }, 94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 95 }; 96 97 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE 98 * value. So if you change this define, make appropriate changes to 99 * inet_addr_hash as well. 100 */ 101 #define IN4_ADDR_HSIZE 256 102 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 103 static DEFINE_SPINLOCK(inet_addr_hash_lock); 104 105 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) 106 { 107 u32 val = (__force u32) addr ^ hash_ptr(net, 8); 108 109 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & 110 (IN4_ADDR_HSIZE - 1)); 111 } 112 113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 114 { 115 unsigned int hash = inet_addr_hash(net, ifa->ifa_local); 116 117 spin_lock(&inet_addr_hash_lock); 118 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 119 spin_unlock(&inet_addr_hash_lock); 120 } 121 122 static void inet_hash_remove(struct in_ifaddr *ifa) 123 { 124 spin_lock(&inet_addr_hash_lock); 125 hlist_del_init_rcu(&ifa->hash); 126 spin_unlock(&inet_addr_hash_lock); 127 } 128 129 /** 130 * __ip_dev_find - find the first device with a given source address. 131 * @net: the net namespace 132 * @addr: the source address 133 * @devref: if true, take a reference on the found device 134 * 135 * If a caller uses devref=false, it should be protected by RCU, or RTNL 136 */ 137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 138 { 139 unsigned int hash = inet_addr_hash(net, addr); 140 struct net_device *result = NULL; 141 struct in_ifaddr *ifa; 142 struct hlist_node *node; 143 144 rcu_read_lock(); 145 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { 146 struct net_device *dev = ifa->ifa_dev->dev; 147 148 if (!net_eq(dev_net(dev), net)) 149 continue; 150 if (ifa->ifa_local == addr) { 151 result = dev; 152 break; 153 } 154 } 155 if (!result) { 156 struct flowi4 fl4 = { .daddr = addr }; 157 struct fib_result res = { 0 }; 158 struct fib_table *local; 159 160 /* Fallback to FIB local table so that communication 161 * over loopback subnets work. 162 */ 163 local = fib_get_table(net, RT_TABLE_LOCAL); 164 if (local && 165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && 166 res.type == RTN_LOCAL) 167 result = FIB_RES_DEV(res); 168 } 169 if (result && devref) 170 dev_hold(result); 171 rcu_read_unlock(); 172 return result; 173 } 174 EXPORT_SYMBOL(__ip_dev_find); 175 176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 177 178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 180 int destroy); 181 #ifdef CONFIG_SYSCTL 182 static void devinet_sysctl_register(struct in_device *idev); 183 static void devinet_sysctl_unregister(struct in_device *idev); 184 #else 185 static inline void devinet_sysctl_register(struct in_device *idev) 186 { 187 } 188 static inline void devinet_sysctl_unregister(struct in_device *idev) 189 { 190 } 191 #endif 192 193 /* Locks all the inet devices. */ 194 195 static struct in_ifaddr *inet_alloc_ifa(void) 196 { 197 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 198 } 199 200 static void inet_rcu_free_ifa(struct rcu_head *head) 201 { 202 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 203 if (ifa->ifa_dev) 204 in_dev_put(ifa->ifa_dev); 205 kfree(ifa); 206 } 207 208 static inline void inet_free_ifa(struct in_ifaddr *ifa) 209 { 210 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 211 } 212 213 void in_dev_finish_destroy(struct in_device *idev) 214 { 215 struct net_device *dev = idev->dev; 216 217 WARN_ON(idev->ifa_list); 218 WARN_ON(idev->mc_list); 219 #ifdef NET_REFCNT_DEBUG 220 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", 221 idev, dev ? dev->name : "NIL"); 222 #endif 223 dev_put(dev); 224 if (!idev->dead) 225 pr_err("Freeing alive in_device %p\n", idev); 226 else 227 kfree(idev); 228 } 229 EXPORT_SYMBOL(in_dev_finish_destroy); 230 231 static struct in_device *inetdev_init(struct net_device *dev) 232 { 233 struct in_device *in_dev; 234 235 ASSERT_RTNL(); 236 237 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 238 if (!in_dev) 239 goto out; 240 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 241 sizeof(in_dev->cnf)); 242 in_dev->cnf.sysctl = NULL; 243 in_dev->dev = dev; 244 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 245 if (!in_dev->arp_parms) 246 goto out_kfree; 247 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 248 dev_disable_lro(dev); 249 /* Reference in_dev->dev */ 250 dev_hold(dev); 251 /* Account for reference dev->ip_ptr (below) */ 252 in_dev_hold(in_dev); 253 254 devinet_sysctl_register(in_dev); 255 ip_mc_init_dev(in_dev); 256 if (dev->flags & IFF_UP) 257 ip_mc_up(in_dev); 258 259 /* we can receive as soon as ip_ptr is set -- do this last */ 260 rcu_assign_pointer(dev->ip_ptr, in_dev); 261 out: 262 return in_dev; 263 out_kfree: 264 kfree(in_dev); 265 in_dev = NULL; 266 goto out; 267 } 268 269 static void in_dev_rcu_put(struct rcu_head *head) 270 { 271 struct in_device *idev = container_of(head, struct in_device, rcu_head); 272 in_dev_put(idev); 273 } 274 275 static void inetdev_destroy(struct in_device *in_dev) 276 { 277 struct in_ifaddr *ifa; 278 struct net_device *dev; 279 280 ASSERT_RTNL(); 281 282 dev = in_dev->dev; 283 284 in_dev->dead = 1; 285 286 ip_mc_destroy_dev(in_dev); 287 288 while ((ifa = in_dev->ifa_list) != NULL) { 289 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 290 inet_free_ifa(ifa); 291 } 292 293 RCU_INIT_POINTER(dev->ip_ptr, NULL); 294 295 devinet_sysctl_unregister(in_dev); 296 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 297 arp_ifdown(dev); 298 299 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 300 } 301 302 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 303 { 304 rcu_read_lock(); 305 for_primary_ifa(in_dev) { 306 if (inet_ifa_match(a, ifa)) { 307 if (!b || inet_ifa_match(b, ifa)) { 308 rcu_read_unlock(); 309 return 1; 310 } 311 } 312 } endfor_ifa(in_dev); 313 rcu_read_unlock(); 314 return 0; 315 } 316 317 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 318 int destroy, struct nlmsghdr *nlh, u32 pid) 319 { 320 struct in_ifaddr *promote = NULL; 321 struct in_ifaddr *ifa, *ifa1 = *ifap; 322 struct in_ifaddr *last_prim = in_dev->ifa_list; 323 struct in_ifaddr *prev_prom = NULL; 324 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 325 326 ASSERT_RTNL(); 327 328 /* 1. Deleting primary ifaddr forces deletion all secondaries 329 * unless alias promotion is set 330 **/ 331 332 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 333 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 334 335 while ((ifa = *ifap1) != NULL) { 336 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 337 ifa1->ifa_scope <= ifa->ifa_scope) 338 last_prim = ifa; 339 340 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 341 ifa1->ifa_mask != ifa->ifa_mask || 342 !inet_ifa_match(ifa1->ifa_address, ifa)) { 343 ifap1 = &ifa->ifa_next; 344 prev_prom = ifa; 345 continue; 346 } 347 348 if (!do_promote) { 349 inet_hash_remove(ifa); 350 *ifap1 = ifa->ifa_next; 351 352 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); 353 blocking_notifier_call_chain(&inetaddr_chain, 354 NETDEV_DOWN, ifa); 355 inet_free_ifa(ifa); 356 } else { 357 promote = ifa; 358 break; 359 } 360 } 361 } 362 363 /* On promotion all secondaries from subnet are changing 364 * the primary IP, we must remove all their routes silently 365 * and later to add them back with new prefsrc. Do this 366 * while all addresses are on the device list. 367 */ 368 for (ifa = promote; ifa; ifa = ifa->ifa_next) { 369 if (ifa1->ifa_mask == ifa->ifa_mask && 370 inet_ifa_match(ifa1->ifa_address, ifa)) 371 fib_del_ifaddr(ifa, ifa1); 372 } 373 374 /* 2. Unlink it */ 375 376 *ifap = ifa1->ifa_next; 377 inet_hash_remove(ifa1); 378 379 /* 3. Announce address deletion */ 380 381 /* Send message first, then call notifier. 382 At first sight, FIB update triggered by notifier 383 will refer to already deleted ifaddr, that could confuse 384 netlink listeners. It is not true: look, gated sees 385 that route deleted and if it still thinks that ifaddr 386 is valid, it will try to restore deleted routes... Grr. 387 So that, this order is correct. 388 */ 389 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); 390 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 391 392 if (promote) { 393 struct in_ifaddr *next_sec = promote->ifa_next; 394 395 if (prev_prom) { 396 prev_prom->ifa_next = promote->ifa_next; 397 promote->ifa_next = last_prim->ifa_next; 398 last_prim->ifa_next = promote; 399 } 400 401 promote->ifa_flags &= ~IFA_F_SECONDARY; 402 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); 403 blocking_notifier_call_chain(&inetaddr_chain, 404 NETDEV_UP, promote); 405 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { 406 if (ifa1->ifa_mask != ifa->ifa_mask || 407 !inet_ifa_match(ifa1->ifa_address, ifa)) 408 continue; 409 fib_add_ifaddr(ifa); 410 } 411 412 } 413 if (destroy) 414 inet_free_ifa(ifa1); 415 } 416 417 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 418 int destroy) 419 { 420 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 421 } 422 423 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 424 u32 pid) 425 { 426 struct in_device *in_dev = ifa->ifa_dev; 427 struct in_ifaddr *ifa1, **ifap, **last_primary; 428 429 ASSERT_RTNL(); 430 431 if (!ifa->ifa_local) { 432 inet_free_ifa(ifa); 433 return 0; 434 } 435 436 ifa->ifa_flags &= ~IFA_F_SECONDARY; 437 last_primary = &in_dev->ifa_list; 438 439 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 440 ifap = &ifa1->ifa_next) { 441 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 442 ifa->ifa_scope <= ifa1->ifa_scope) 443 last_primary = &ifa1->ifa_next; 444 if (ifa1->ifa_mask == ifa->ifa_mask && 445 inet_ifa_match(ifa1->ifa_address, ifa)) { 446 if (ifa1->ifa_local == ifa->ifa_local) { 447 inet_free_ifa(ifa); 448 return -EEXIST; 449 } 450 if (ifa1->ifa_scope != ifa->ifa_scope) { 451 inet_free_ifa(ifa); 452 return -EINVAL; 453 } 454 ifa->ifa_flags |= IFA_F_SECONDARY; 455 } 456 } 457 458 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 459 net_srandom(ifa->ifa_local); 460 ifap = last_primary; 461 } 462 463 ifa->ifa_next = *ifap; 464 *ifap = ifa; 465 466 inet_hash_insert(dev_net(in_dev->dev), ifa); 467 468 /* Send message first, then call notifier. 469 Notifier will trigger FIB update, so that 470 listeners of netlink will know about new ifaddr */ 471 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); 472 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 473 474 return 0; 475 } 476 477 static int inet_insert_ifa(struct in_ifaddr *ifa) 478 { 479 return __inet_insert_ifa(ifa, NULL, 0); 480 } 481 482 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 483 { 484 struct in_device *in_dev = __in_dev_get_rtnl(dev); 485 486 ASSERT_RTNL(); 487 488 if (!in_dev) { 489 inet_free_ifa(ifa); 490 return -ENOBUFS; 491 } 492 ipv4_devconf_setall(in_dev); 493 if (ifa->ifa_dev != in_dev) { 494 WARN_ON(ifa->ifa_dev); 495 in_dev_hold(in_dev); 496 ifa->ifa_dev = in_dev; 497 } 498 if (ipv4_is_loopback(ifa->ifa_local)) 499 ifa->ifa_scope = RT_SCOPE_HOST; 500 return inet_insert_ifa(ifa); 501 } 502 503 /* Caller must hold RCU or RTNL : 504 * We dont take a reference on found in_device 505 */ 506 struct in_device *inetdev_by_index(struct net *net, int ifindex) 507 { 508 struct net_device *dev; 509 struct in_device *in_dev = NULL; 510 511 rcu_read_lock(); 512 dev = dev_get_by_index_rcu(net, ifindex); 513 if (dev) 514 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 515 rcu_read_unlock(); 516 return in_dev; 517 } 518 EXPORT_SYMBOL(inetdev_by_index); 519 520 /* Called only from RTNL semaphored context. No locks. */ 521 522 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 523 __be32 mask) 524 { 525 ASSERT_RTNL(); 526 527 for_primary_ifa(in_dev) { 528 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 529 return ifa; 530 } endfor_ifa(in_dev); 531 return NULL; 532 } 533 534 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 535 { 536 struct net *net = sock_net(skb->sk); 537 struct nlattr *tb[IFA_MAX+1]; 538 struct in_device *in_dev; 539 struct ifaddrmsg *ifm; 540 struct in_ifaddr *ifa, **ifap; 541 int err = -EINVAL; 542 543 ASSERT_RTNL(); 544 545 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 546 if (err < 0) 547 goto errout; 548 549 ifm = nlmsg_data(nlh); 550 in_dev = inetdev_by_index(net, ifm->ifa_index); 551 if (in_dev == NULL) { 552 err = -ENODEV; 553 goto errout; 554 } 555 556 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 557 ifap = &ifa->ifa_next) { 558 if (tb[IFA_LOCAL] && 559 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) 560 continue; 561 562 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 563 continue; 564 565 if (tb[IFA_ADDRESS] && 566 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 567 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) 568 continue; 569 570 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); 571 return 0; 572 } 573 574 err = -EADDRNOTAVAIL; 575 errout: 576 return err; 577 } 578 579 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) 580 { 581 struct nlattr *tb[IFA_MAX+1]; 582 struct in_ifaddr *ifa; 583 struct ifaddrmsg *ifm; 584 struct net_device *dev; 585 struct in_device *in_dev; 586 int err; 587 588 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 589 if (err < 0) 590 goto errout; 591 592 ifm = nlmsg_data(nlh); 593 err = -EINVAL; 594 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) 595 goto errout; 596 597 dev = __dev_get_by_index(net, ifm->ifa_index); 598 err = -ENODEV; 599 if (dev == NULL) 600 goto errout; 601 602 in_dev = __in_dev_get_rtnl(dev); 603 err = -ENOBUFS; 604 if (in_dev == NULL) 605 goto errout; 606 607 ifa = inet_alloc_ifa(); 608 if (ifa == NULL) 609 /* 610 * A potential indev allocation can be left alive, it stays 611 * assigned to its device and is destroy with it. 612 */ 613 goto errout; 614 615 ipv4_devconf_setall(in_dev); 616 in_dev_hold(in_dev); 617 618 if (tb[IFA_ADDRESS] == NULL) 619 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 620 621 INIT_HLIST_NODE(&ifa->hash); 622 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 623 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 624 ifa->ifa_flags = ifm->ifa_flags; 625 ifa->ifa_scope = ifm->ifa_scope; 626 ifa->ifa_dev = in_dev; 627 628 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); 629 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); 630 631 if (tb[IFA_BROADCAST]) 632 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 633 634 if (tb[IFA_LABEL]) 635 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 636 else 637 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 638 639 return ifa; 640 641 errout: 642 return ERR_PTR(err); 643 } 644 645 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 646 { 647 struct net *net = sock_net(skb->sk); 648 struct in_ifaddr *ifa; 649 650 ASSERT_RTNL(); 651 652 ifa = rtm_to_ifaddr(net, nlh); 653 if (IS_ERR(ifa)) 654 return PTR_ERR(ifa); 655 656 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); 657 } 658 659 /* 660 * Determine a default network mask, based on the IP address. 661 */ 662 663 static inline int inet_abc_len(__be32 addr) 664 { 665 int rc = -1; /* Something else, probably a multicast. */ 666 667 if (ipv4_is_zeronet(addr)) 668 rc = 0; 669 else { 670 __u32 haddr = ntohl(addr); 671 672 if (IN_CLASSA(haddr)) 673 rc = 8; 674 else if (IN_CLASSB(haddr)) 675 rc = 16; 676 else if (IN_CLASSC(haddr)) 677 rc = 24; 678 } 679 680 return rc; 681 } 682 683 684 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 685 { 686 struct ifreq ifr; 687 struct sockaddr_in sin_orig; 688 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 689 struct in_device *in_dev; 690 struct in_ifaddr **ifap = NULL; 691 struct in_ifaddr *ifa = NULL; 692 struct net_device *dev; 693 char *colon; 694 int ret = -EFAULT; 695 int tryaddrmatch = 0; 696 697 /* 698 * Fetch the caller's info block into kernel space 699 */ 700 701 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 702 goto out; 703 ifr.ifr_name[IFNAMSIZ - 1] = 0; 704 705 /* save original address for comparison */ 706 memcpy(&sin_orig, sin, sizeof(*sin)); 707 708 colon = strchr(ifr.ifr_name, ':'); 709 if (colon) 710 *colon = 0; 711 712 dev_load(net, ifr.ifr_name); 713 714 switch (cmd) { 715 case SIOCGIFADDR: /* Get interface address */ 716 case SIOCGIFBRDADDR: /* Get the broadcast address */ 717 case SIOCGIFDSTADDR: /* Get the destination address */ 718 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 719 /* Note that these ioctls will not sleep, 720 so that we do not impose a lock. 721 One day we will be forced to put shlock here (I mean SMP) 722 */ 723 tryaddrmatch = (sin_orig.sin_family == AF_INET); 724 memset(sin, 0, sizeof(*sin)); 725 sin->sin_family = AF_INET; 726 break; 727 728 case SIOCSIFFLAGS: 729 ret = -EACCES; 730 if (!capable(CAP_NET_ADMIN)) 731 goto out; 732 break; 733 case SIOCSIFADDR: /* Set interface address (and family) */ 734 case SIOCSIFBRDADDR: /* Set the broadcast address */ 735 case SIOCSIFDSTADDR: /* Set the destination address */ 736 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 737 ret = -EACCES; 738 if (!capable(CAP_NET_ADMIN)) 739 goto out; 740 ret = -EINVAL; 741 if (sin->sin_family != AF_INET) 742 goto out; 743 break; 744 default: 745 ret = -EINVAL; 746 goto out; 747 } 748 749 rtnl_lock(); 750 751 ret = -ENODEV; 752 dev = __dev_get_by_name(net, ifr.ifr_name); 753 if (!dev) 754 goto done; 755 756 if (colon) 757 *colon = ':'; 758 759 in_dev = __in_dev_get_rtnl(dev); 760 if (in_dev) { 761 if (tryaddrmatch) { 762 /* Matthias Andree */ 763 /* compare label and address (4.4BSD style) */ 764 /* note: we only do this for a limited set of ioctls 765 and only if the original address family was AF_INET. 766 This is checked above. */ 767 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 768 ifap = &ifa->ifa_next) { 769 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 770 sin_orig.sin_addr.s_addr == 771 ifa->ifa_local) { 772 break; /* found */ 773 } 774 } 775 } 776 /* we didn't get a match, maybe the application is 777 4.3BSD-style and passed in junk so we fall back to 778 comparing just the label */ 779 if (!ifa) { 780 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 781 ifap = &ifa->ifa_next) 782 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 783 break; 784 } 785 } 786 787 ret = -EADDRNOTAVAIL; 788 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 789 goto done; 790 791 switch (cmd) { 792 case SIOCGIFADDR: /* Get interface address */ 793 sin->sin_addr.s_addr = ifa->ifa_local; 794 goto rarok; 795 796 case SIOCGIFBRDADDR: /* Get the broadcast address */ 797 sin->sin_addr.s_addr = ifa->ifa_broadcast; 798 goto rarok; 799 800 case SIOCGIFDSTADDR: /* Get the destination address */ 801 sin->sin_addr.s_addr = ifa->ifa_address; 802 goto rarok; 803 804 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 805 sin->sin_addr.s_addr = ifa->ifa_mask; 806 goto rarok; 807 808 case SIOCSIFFLAGS: 809 if (colon) { 810 ret = -EADDRNOTAVAIL; 811 if (!ifa) 812 break; 813 ret = 0; 814 if (!(ifr.ifr_flags & IFF_UP)) 815 inet_del_ifa(in_dev, ifap, 1); 816 break; 817 } 818 ret = dev_change_flags(dev, ifr.ifr_flags); 819 break; 820 821 case SIOCSIFADDR: /* Set interface address (and family) */ 822 ret = -EINVAL; 823 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 824 break; 825 826 if (!ifa) { 827 ret = -ENOBUFS; 828 ifa = inet_alloc_ifa(); 829 INIT_HLIST_NODE(&ifa->hash); 830 if (!ifa) 831 break; 832 if (colon) 833 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 834 else 835 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 836 } else { 837 ret = 0; 838 if (ifa->ifa_local == sin->sin_addr.s_addr) 839 break; 840 inet_del_ifa(in_dev, ifap, 0); 841 ifa->ifa_broadcast = 0; 842 ifa->ifa_scope = 0; 843 } 844 845 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 846 847 if (!(dev->flags & IFF_POINTOPOINT)) { 848 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 849 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 850 if ((dev->flags & IFF_BROADCAST) && 851 ifa->ifa_prefixlen < 31) 852 ifa->ifa_broadcast = ifa->ifa_address | 853 ~ifa->ifa_mask; 854 } else { 855 ifa->ifa_prefixlen = 32; 856 ifa->ifa_mask = inet_make_mask(32); 857 } 858 ret = inet_set_ifa(dev, ifa); 859 break; 860 861 case SIOCSIFBRDADDR: /* Set the broadcast address */ 862 ret = 0; 863 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 864 inet_del_ifa(in_dev, ifap, 0); 865 ifa->ifa_broadcast = sin->sin_addr.s_addr; 866 inet_insert_ifa(ifa); 867 } 868 break; 869 870 case SIOCSIFDSTADDR: /* Set the destination address */ 871 ret = 0; 872 if (ifa->ifa_address == sin->sin_addr.s_addr) 873 break; 874 ret = -EINVAL; 875 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 876 break; 877 ret = 0; 878 inet_del_ifa(in_dev, ifap, 0); 879 ifa->ifa_address = sin->sin_addr.s_addr; 880 inet_insert_ifa(ifa); 881 break; 882 883 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 884 885 /* 886 * The mask we set must be legal. 887 */ 888 ret = -EINVAL; 889 if (bad_mask(sin->sin_addr.s_addr, 0)) 890 break; 891 ret = 0; 892 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 893 __be32 old_mask = ifa->ifa_mask; 894 inet_del_ifa(in_dev, ifap, 0); 895 ifa->ifa_mask = sin->sin_addr.s_addr; 896 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 897 898 /* See if current broadcast address matches 899 * with current netmask, then recalculate 900 * the broadcast address. Otherwise it's a 901 * funny address, so don't touch it since 902 * the user seems to know what (s)he's doing... 903 */ 904 if ((dev->flags & IFF_BROADCAST) && 905 (ifa->ifa_prefixlen < 31) && 906 (ifa->ifa_broadcast == 907 (ifa->ifa_local|~old_mask))) { 908 ifa->ifa_broadcast = (ifa->ifa_local | 909 ~sin->sin_addr.s_addr); 910 } 911 inet_insert_ifa(ifa); 912 } 913 break; 914 } 915 done: 916 rtnl_unlock(); 917 out: 918 return ret; 919 rarok: 920 rtnl_unlock(); 921 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 922 goto out; 923 } 924 925 static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 926 { 927 struct in_device *in_dev = __in_dev_get_rtnl(dev); 928 struct in_ifaddr *ifa; 929 struct ifreq ifr; 930 int done = 0; 931 932 if (!in_dev) 933 goto out; 934 935 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 936 if (!buf) { 937 done += sizeof(ifr); 938 continue; 939 } 940 if (len < (int) sizeof(ifr)) 941 break; 942 memset(&ifr, 0, sizeof(struct ifreq)); 943 if (ifa->ifa_label) 944 strcpy(ifr.ifr_name, ifa->ifa_label); 945 else 946 strcpy(ifr.ifr_name, dev->name); 947 948 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 949 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 950 ifa->ifa_local; 951 952 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 953 done = -EFAULT; 954 break; 955 } 956 buf += sizeof(struct ifreq); 957 len -= sizeof(struct ifreq); 958 done += sizeof(struct ifreq); 959 } 960 out: 961 return done; 962 } 963 964 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 965 { 966 __be32 addr = 0; 967 struct in_device *in_dev; 968 struct net *net = dev_net(dev); 969 970 rcu_read_lock(); 971 in_dev = __in_dev_get_rcu(dev); 972 if (!in_dev) 973 goto no_in_dev; 974 975 for_primary_ifa(in_dev) { 976 if (ifa->ifa_scope > scope) 977 continue; 978 if (!dst || inet_ifa_match(dst, ifa)) { 979 addr = ifa->ifa_local; 980 break; 981 } 982 if (!addr) 983 addr = ifa->ifa_local; 984 } endfor_ifa(in_dev); 985 986 if (addr) 987 goto out_unlock; 988 no_in_dev: 989 990 /* Not loopback addresses on loopback should be preferred 991 in this case. It is importnat that lo is the first interface 992 in dev_base list. 993 */ 994 for_each_netdev_rcu(net, dev) { 995 in_dev = __in_dev_get_rcu(dev); 996 if (!in_dev) 997 continue; 998 999 for_primary_ifa(in_dev) { 1000 if (ifa->ifa_scope != RT_SCOPE_LINK && 1001 ifa->ifa_scope <= scope) { 1002 addr = ifa->ifa_local; 1003 goto out_unlock; 1004 } 1005 } endfor_ifa(in_dev); 1006 } 1007 out_unlock: 1008 rcu_read_unlock(); 1009 return addr; 1010 } 1011 EXPORT_SYMBOL(inet_select_addr); 1012 1013 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 1014 __be32 local, int scope) 1015 { 1016 int same = 0; 1017 __be32 addr = 0; 1018 1019 for_ifa(in_dev) { 1020 if (!addr && 1021 (local == ifa->ifa_local || !local) && 1022 ifa->ifa_scope <= scope) { 1023 addr = ifa->ifa_local; 1024 if (same) 1025 break; 1026 } 1027 if (!same) { 1028 same = (!local || inet_ifa_match(local, ifa)) && 1029 (!dst || inet_ifa_match(dst, ifa)); 1030 if (same && addr) { 1031 if (local || !dst) 1032 break; 1033 /* Is the selected addr into dst subnet? */ 1034 if (inet_ifa_match(addr, ifa)) 1035 break; 1036 /* No, then can we use new local src? */ 1037 if (ifa->ifa_scope <= scope) { 1038 addr = ifa->ifa_local; 1039 break; 1040 } 1041 /* search for large dst subnet for addr */ 1042 same = 0; 1043 } 1044 } 1045 } endfor_ifa(in_dev); 1046 1047 return same ? addr : 0; 1048 } 1049 1050 /* 1051 * Confirm that local IP address exists using wildcards: 1052 * - in_dev: only on this interface, 0=any interface 1053 * - dst: only in the same subnet as dst, 0=any dst 1054 * - local: address, 0=autoselect the local address 1055 * - scope: maximum allowed scope value for the local address 1056 */ 1057 __be32 inet_confirm_addr(struct in_device *in_dev, 1058 __be32 dst, __be32 local, int scope) 1059 { 1060 __be32 addr = 0; 1061 struct net_device *dev; 1062 struct net *net; 1063 1064 if (scope != RT_SCOPE_LINK) 1065 return confirm_addr_indev(in_dev, dst, local, scope); 1066 1067 net = dev_net(in_dev->dev); 1068 rcu_read_lock(); 1069 for_each_netdev_rcu(net, dev) { 1070 in_dev = __in_dev_get_rcu(dev); 1071 if (in_dev) { 1072 addr = confirm_addr_indev(in_dev, dst, local, scope); 1073 if (addr) 1074 break; 1075 } 1076 } 1077 rcu_read_unlock(); 1078 1079 return addr; 1080 } 1081 1082 /* 1083 * Device notifier 1084 */ 1085 1086 int register_inetaddr_notifier(struct notifier_block *nb) 1087 { 1088 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1089 } 1090 EXPORT_SYMBOL(register_inetaddr_notifier); 1091 1092 int unregister_inetaddr_notifier(struct notifier_block *nb) 1093 { 1094 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1095 } 1096 EXPORT_SYMBOL(unregister_inetaddr_notifier); 1097 1098 /* Rename ifa_labels for a device name change. Make some effort to preserve 1099 * existing alias numbering and to create unique labels if possible. 1100 */ 1101 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1102 { 1103 struct in_ifaddr *ifa; 1104 int named = 0; 1105 1106 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1107 char old[IFNAMSIZ], *dot; 1108 1109 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1110 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1111 if (named++ == 0) 1112 goto skip; 1113 dot = strchr(old, ':'); 1114 if (dot == NULL) { 1115 sprintf(old, ":%d", named); 1116 dot = old; 1117 } 1118 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1119 strcat(ifa->ifa_label, dot); 1120 else 1121 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1122 skip: 1123 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1124 } 1125 } 1126 1127 static inline bool inetdev_valid_mtu(unsigned mtu) 1128 { 1129 return mtu >= 68; 1130 } 1131 1132 static void inetdev_send_gratuitous_arp(struct net_device *dev, 1133 struct in_device *in_dev) 1134 1135 { 1136 struct in_ifaddr *ifa; 1137 1138 for (ifa = in_dev->ifa_list; ifa; 1139 ifa = ifa->ifa_next) { 1140 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1141 ifa->ifa_local, dev, 1142 ifa->ifa_local, NULL, 1143 dev->dev_addr, NULL); 1144 } 1145 } 1146 1147 /* Called only under RTNL semaphore */ 1148 1149 static int inetdev_event(struct notifier_block *this, unsigned long event, 1150 void *ptr) 1151 { 1152 struct net_device *dev = ptr; 1153 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1154 1155 ASSERT_RTNL(); 1156 1157 if (!in_dev) { 1158 if (event == NETDEV_REGISTER) { 1159 in_dev = inetdev_init(dev); 1160 if (!in_dev) 1161 return notifier_from_errno(-ENOMEM); 1162 if (dev->flags & IFF_LOOPBACK) { 1163 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1164 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1165 } 1166 } else if (event == NETDEV_CHANGEMTU) { 1167 /* Re-enabling IP */ 1168 if (inetdev_valid_mtu(dev->mtu)) 1169 in_dev = inetdev_init(dev); 1170 } 1171 goto out; 1172 } 1173 1174 switch (event) { 1175 case NETDEV_REGISTER: 1176 printk(KERN_DEBUG "inetdev_event: bug\n"); 1177 RCU_INIT_POINTER(dev->ip_ptr, NULL); 1178 break; 1179 case NETDEV_UP: 1180 if (!inetdev_valid_mtu(dev->mtu)) 1181 break; 1182 if (dev->flags & IFF_LOOPBACK) { 1183 struct in_ifaddr *ifa = inet_alloc_ifa(); 1184 1185 if (ifa) { 1186 INIT_HLIST_NODE(&ifa->hash); 1187 ifa->ifa_local = 1188 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1189 ifa->ifa_prefixlen = 8; 1190 ifa->ifa_mask = inet_make_mask(8); 1191 in_dev_hold(in_dev); 1192 ifa->ifa_dev = in_dev; 1193 ifa->ifa_scope = RT_SCOPE_HOST; 1194 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1195 inet_insert_ifa(ifa); 1196 } 1197 } 1198 ip_mc_up(in_dev); 1199 /* fall through */ 1200 case NETDEV_CHANGEADDR: 1201 if (!IN_DEV_ARP_NOTIFY(in_dev)) 1202 break; 1203 /* fall through */ 1204 case NETDEV_NOTIFY_PEERS: 1205 /* Send gratuitous ARP to notify of link change */ 1206 inetdev_send_gratuitous_arp(dev, in_dev); 1207 break; 1208 case NETDEV_DOWN: 1209 ip_mc_down(in_dev); 1210 break; 1211 case NETDEV_PRE_TYPE_CHANGE: 1212 ip_mc_unmap(in_dev); 1213 break; 1214 case NETDEV_POST_TYPE_CHANGE: 1215 ip_mc_remap(in_dev); 1216 break; 1217 case NETDEV_CHANGEMTU: 1218 if (inetdev_valid_mtu(dev->mtu)) 1219 break; 1220 /* disable IP when MTU is not enough */ 1221 case NETDEV_UNREGISTER: 1222 inetdev_destroy(in_dev); 1223 break; 1224 case NETDEV_CHANGENAME: 1225 /* Do not notify about label change, this event is 1226 * not interesting to applications using netlink. 1227 */ 1228 inetdev_changename(dev, in_dev); 1229 1230 devinet_sysctl_unregister(in_dev); 1231 devinet_sysctl_register(in_dev); 1232 break; 1233 } 1234 out: 1235 return NOTIFY_DONE; 1236 } 1237 1238 static struct notifier_block ip_netdev_notifier = { 1239 .notifier_call = inetdev_event, 1240 }; 1241 1242 static inline size_t inet_nlmsg_size(void) 1243 { 1244 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1245 + nla_total_size(4) /* IFA_ADDRESS */ 1246 + nla_total_size(4) /* IFA_LOCAL */ 1247 + nla_total_size(4) /* IFA_BROADCAST */ 1248 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1249 } 1250 1251 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1252 u32 pid, u32 seq, int event, unsigned int flags) 1253 { 1254 struct ifaddrmsg *ifm; 1255 struct nlmsghdr *nlh; 1256 1257 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); 1258 if (nlh == NULL) 1259 return -EMSGSIZE; 1260 1261 ifm = nlmsg_data(nlh); 1262 ifm->ifa_family = AF_INET; 1263 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1264 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; 1265 ifm->ifa_scope = ifa->ifa_scope; 1266 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1267 1268 if (ifa->ifa_address) 1269 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); 1270 1271 if (ifa->ifa_local) 1272 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); 1273 1274 if (ifa->ifa_broadcast) 1275 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1276 1277 if (ifa->ifa_label[0]) 1278 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1279 1280 return nlmsg_end(skb, nlh); 1281 1282 nla_put_failure: 1283 nlmsg_cancel(skb, nlh); 1284 return -EMSGSIZE; 1285 } 1286 1287 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1288 { 1289 struct net *net = sock_net(skb->sk); 1290 int h, s_h; 1291 int idx, s_idx; 1292 int ip_idx, s_ip_idx; 1293 struct net_device *dev; 1294 struct in_device *in_dev; 1295 struct in_ifaddr *ifa; 1296 struct hlist_head *head; 1297 struct hlist_node *node; 1298 1299 s_h = cb->args[0]; 1300 s_idx = idx = cb->args[1]; 1301 s_ip_idx = ip_idx = cb->args[2]; 1302 1303 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1304 idx = 0; 1305 head = &net->dev_index_head[h]; 1306 rcu_read_lock(); 1307 hlist_for_each_entry_rcu(dev, node, head, index_hlist) { 1308 if (idx < s_idx) 1309 goto cont; 1310 if (h > s_h || idx > s_idx) 1311 s_ip_idx = 0; 1312 in_dev = __in_dev_get_rcu(dev); 1313 if (!in_dev) 1314 goto cont; 1315 1316 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1317 ifa = ifa->ifa_next, ip_idx++) { 1318 if (ip_idx < s_ip_idx) 1319 continue; 1320 if (inet_fill_ifaddr(skb, ifa, 1321 NETLINK_CB(cb->skb).pid, 1322 cb->nlh->nlmsg_seq, 1323 RTM_NEWADDR, NLM_F_MULTI) <= 0) { 1324 rcu_read_unlock(); 1325 goto done; 1326 } 1327 } 1328 cont: 1329 idx++; 1330 } 1331 rcu_read_unlock(); 1332 } 1333 1334 done: 1335 cb->args[0] = h; 1336 cb->args[1] = idx; 1337 cb->args[2] = ip_idx; 1338 1339 return skb->len; 1340 } 1341 1342 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1343 u32 pid) 1344 { 1345 struct sk_buff *skb; 1346 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1347 int err = -ENOBUFS; 1348 struct net *net; 1349 1350 net = dev_net(ifa->ifa_dev->dev); 1351 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1352 if (skb == NULL) 1353 goto errout; 1354 1355 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); 1356 if (err < 0) { 1357 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1358 WARN_ON(err == -EMSGSIZE); 1359 kfree_skb(skb); 1360 goto errout; 1361 } 1362 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1363 return; 1364 errout: 1365 if (err < 0) 1366 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1367 } 1368 1369 static size_t inet_get_link_af_size(const struct net_device *dev) 1370 { 1371 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1372 1373 if (!in_dev) 1374 return 0; 1375 1376 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1377 } 1378 1379 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1380 { 1381 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); 1382 struct nlattr *nla; 1383 int i; 1384 1385 if (!in_dev) 1386 return -ENODATA; 1387 1388 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1389 if (nla == NULL) 1390 return -EMSGSIZE; 1391 1392 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1393 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1394 1395 return 0; 1396 } 1397 1398 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1399 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1400 }; 1401 1402 static int inet_validate_link_af(const struct net_device *dev, 1403 const struct nlattr *nla) 1404 { 1405 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1406 int err, rem; 1407 1408 if (dev && !__in_dev_get_rtnl(dev)) 1409 return -EAFNOSUPPORT; 1410 1411 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); 1412 if (err < 0) 1413 return err; 1414 1415 if (tb[IFLA_INET_CONF]) { 1416 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1417 int cfgid = nla_type(a); 1418 1419 if (nla_len(a) < 4) 1420 return -EINVAL; 1421 1422 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1423 return -EINVAL; 1424 } 1425 } 1426 1427 return 0; 1428 } 1429 1430 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1431 { 1432 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1433 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1434 int rem; 1435 1436 if (!in_dev) 1437 return -EAFNOSUPPORT; 1438 1439 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) 1440 BUG(); 1441 1442 if (tb[IFLA_INET_CONF]) { 1443 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1444 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1445 } 1446 1447 return 0; 1448 } 1449 1450 #ifdef CONFIG_SYSCTL 1451 1452 static void devinet_copy_dflt_conf(struct net *net, int i) 1453 { 1454 struct net_device *dev; 1455 1456 rcu_read_lock(); 1457 for_each_netdev_rcu(net, dev) { 1458 struct in_device *in_dev; 1459 1460 in_dev = __in_dev_get_rcu(dev); 1461 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1462 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 1463 } 1464 rcu_read_unlock(); 1465 } 1466 1467 /* called with RTNL locked */ 1468 static void inet_forward_change(struct net *net) 1469 { 1470 struct net_device *dev; 1471 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 1472 1473 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1474 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1475 1476 for_each_netdev(net, dev) { 1477 struct in_device *in_dev; 1478 if (on) 1479 dev_disable_lro(dev); 1480 rcu_read_lock(); 1481 in_dev = __in_dev_get_rcu(dev); 1482 if (in_dev) 1483 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1484 rcu_read_unlock(); 1485 } 1486 } 1487 1488 static int devinet_conf_proc(ctl_table *ctl, int write, 1489 void __user *buffer, 1490 size_t *lenp, loff_t *ppos) 1491 { 1492 int old_value = *(int *)ctl->data; 1493 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1494 int new_value = *(int *)ctl->data; 1495 1496 if (write) { 1497 struct ipv4_devconf *cnf = ctl->extra1; 1498 struct net *net = ctl->extra2; 1499 int i = (int *)ctl->data - cnf->data; 1500 1501 set_bit(i, cnf->state); 1502 1503 if (cnf == net->ipv4.devconf_dflt) 1504 devinet_copy_dflt_conf(net, i); 1505 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) 1506 if ((new_value == 0) && (old_value != 0)) 1507 rt_cache_flush(net, 0); 1508 } 1509 1510 return ret; 1511 } 1512 1513 static int devinet_sysctl_forward(ctl_table *ctl, int write, 1514 void __user *buffer, 1515 size_t *lenp, loff_t *ppos) 1516 { 1517 int *valp = ctl->data; 1518 int val = *valp; 1519 loff_t pos = *ppos; 1520 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1521 1522 if (write && *valp != val) { 1523 struct net *net = ctl->extra2; 1524 1525 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 1526 if (!rtnl_trylock()) { 1527 /* Restore the original values before restarting */ 1528 *valp = val; 1529 *ppos = pos; 1530 return restart_syscall(); 1531 } 1532 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1533 inet_forward_change(net); 1534 } else if (*valp) { 1535 struct ipv4_devconf *cnf = ctl->extra1; 1536 struct in_device *idev = 1537 container_of(cnf, struct in_device, cnf); 1538 dev_disable_lro(idev->dev); 1539 } 1540 rtnl_unlock(); 1541 rt_cache_flush(net, 0); 1542 } 1543 } 1544 1545 return ret; 1546 } 1547 1548 static int ipv4_doint_and_flush(ctl_table *ctl, int write, 1549 void __user *buffer, 1550 size_t *lenp, loff_t *ppos) 1551 { 1552 int *valp = ctl->data; 1553 int val = *valp; 1554 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1555 struct net *net = ctl->extra2; 1556 1557 if (write && *valp != val) 1558 rt_cache_flush(net, 0); 1559 1560 return ret; 1561 } 1562 1563 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 1564 { \ 1565 .procname = name, \ 1566 .data = ipv4_devconf.data + \ 1567 IPV4_DEVCONF_ ## attr - 1, \ 1568 .maxlen = sizeof(int), \ 1569 .mode = mval, \ 1570 .proc_handler = proc, \ 1571 .extra1 = &ipv4_devconf, \ 1572 } 1573 1574 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 1575 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 1576 1577 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 1578 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 1579 1580 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 1581 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 1582 1583 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 1584 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 1585 1586 static struct devinet_sysctl_table { 1587 struct ctl_table_header *sysctl_header; 1588 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 1589 char *dev_name; 1590 } devinet_sysctl = { 1591 .devinet_vars = { 1592 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 1593 devinet_sysctl_forward), 1594 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 1595 1596 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 1597 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 1598 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 1599 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 1600 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 1601 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 1602 "accept_source_route"), 1603 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 1604 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 1605 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 1606 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 1607 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 1608 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 1609 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 1610 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 1611 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 1612 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1613 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1614 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 1615 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 1616 1617 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1618 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1619 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION, 1620 "force_igmp_version"), 1621 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 1622 "promote_secondaries"), 1623 }, 1624 }; 1625 1626 static int __devinet_sysctl_register(struct net *net, char *dev_name, 1627 struct ipv4_devconf *p) 1628 { 1629 int i; 1630 struct devinet_sysctl_table *t; 1631 1632 #define DEVINET_CTL_PATH_DEV 3 1633 1634 struct ctl_path devinet_ctl_path[] = { 1635 { .procname = "net", }, 1636 { .procname = "ipv4", }, 1637 { .procname = "conf", }, 1638 { /* to be set */ }, 1639 { }, 1640 }; 1641 1642 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 1643 if (!t) 1644 goto out; 1645 1646 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 1647 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 1648 t->devinet_vars[i].extra1 = p; 1649 t->devinet_vars[i].extra2 = net; 1650 } 1651 1652 /* 1653 * Make a copy of dev_name, because '.procname' is regarded as const 1654 * by sysctl and we wouldn't want anyone to change it under our feet 1655 * (see SIOCSIFNAME). 1656 */ 1657 t->dev_name = kstrdup(dev_name, GFP_KERNEL); 1658 if (!t->dev_name) 1659 goto free; 1660 1661 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; 1662 1663 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, 1664 t->devinet_vars); 1665 if (!t->sysctl_header) 1666 goto free_procname; 1667 1668 p->sysctl = t; 1669 return 0; 1670 1671 free_procname: 1672 kfree(t->dev_name); 1673 free: 1674 kfree(t); 1675 out: 1676 return -ENOBUFS; 1677 } 1678 1679 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) 1680 { 1681 struct devinet_sysctl_table *t = cnf->sysctl; 1682 1683 if (t == NULL) 1684 return; 1685 1686 cnf->sysctl = NULL; 1687 unregister_net_sysctl_table(t->sysctl_header); 1688 kfree(t->dev_name); 1689 kfree(t); 1690 } 1691 1692 static void devinet_sysctl_register(struct in_device *idev) 1693 { 1694 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); 1695 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 1696 &idev->cnf); 1697 } 1698 1699 static void devinet_sysctl_unregister(struct in_device *idev) 1700 { 1701 __devinet_sysctl_unregister(&idev->cnf); 1702 neigh_sysctl_unregister(idev->arp_parms); 1703 } 1704 1705 static struct ctl_table ctl_forward_entry[] = { 1706 { 1707 .procname = "ip_forward", 1708 .data = &ipv4_devconf.data[ 1709 IPV4_DEVCONF_FORWARDING - 1], 1710 .maxlen = sizeof(int), 1711 .mode = 0644, 1712 .proc_handler = devinet_sysctl_forward, 1713 .extra1 = &ipv4_devconf, 1714 .extra2 = &init_net, 1715 }, 1716 { }, 1717 }; 1718 1719 static __net_initdata struct ctl_path net_ipv4_path[] = { 1720 { .procname = "net", }, 1721 { .procname = "ipv4", }, 1722 { }, 1723 }; 1724 #endif 1725 1726 static __net_init int devinet_init_net(struct net *net) 1727 { 1728 int err; 1729 struct ipv4_devconf *all, *dflt; 1730 #ifdef CONFIG_SYSCTL 1731 struct ctl_table *tbl = ctl_forward_entry; 1732 struct ctl_table_header *forw_hdr; 1733 #endif 1734 1735 err = -ENOMEM; 1736 all = &ipv4_devconf; 1737 dflt = &ipv4_devconf_dflt; 1738 1739 if (!net_eq(net, &init_net)) { 1740 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 1741 if (all == NULL) 1742 goto err_alloc_all; 1743 1744 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 1745 if (dflt == NULL) 1746 goto err_alloc_dflt; 1747 1748 #ifdef CONFIG_SYSCTL 1749 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 1750 if (tbl == NULL) 1751 goto err_alloc_ctl; 1752 1753 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 1754 tbl[0].extra1 = all; 1755 tbl[0].extra2 = net; 1756 #endif 1757 } 1758 1759 #ifdef CONFIG_SYSCTL 1760 err = __devinet_sysctl_register(net, "all", all); 1761 if (err < 0) 1762 goto err_reg_all; 1763 1764 err = __devinet_sysctl_register(net, "default", dflt); 1765 if (err < 0) 1766 goto err_reg_dflt; 1767 1768 err = -ENOMEM; 1769 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); 1770 if (forw_hdr == NULL) 1771 goto err_reg_ctl; 1772 net->ipv4.forw_hdr = forw_hdr; 1773 #endif 1774 1775 net->ipv4.devconf_all = all; 1776 net->ipv4.devconf_dflt = dflt; 1777 return 0; 1778 1779 #ifdef CONFIG_SYSCTL 1780 err_reg_ctl: 1781 __devinet_sysctl_unregister(dflt); 1782 err_reg_dflt: 1783 __devinet_sysctl_unregister(all); 1784 err_reg_all: 1785 if (tbl != ctl_forward_entry) 1786 kfree(tbl); 1787 err_alloc_ctl: 1788 #endif 1789 if (dflt != &ipv4_devconf_dflt) 1790 kfree(dflt); 1791 err_alloc_dflt: 1792 if (all != &ipv4_devconf) 1793 kfree(all); 1794 err_alloc_all: 1795 return err; 1796 } 1797 1798 static __net_exit void devinet_exit_net(struct net *net) 1799 { 1800 #ifdef CONFIG_SYSCTL 1801 struct ctl_table *tbl; 1802 1803 tbl = net->ipv4.forw_hdr->ctl_table_arg; 1804 unregister_net_sysctl_table(net->ipv4.forw_hdr); 1805 __devinet_sysctl_unregister(net->ipv4.devconf_dflt); 1806 __devinet_sysctl_unregister(net->ipv4.devconf_all); 1807 kfree(tbl); 1808 #endif 1809 kfree(net->ipv4.devconf_dflt); 1810 kfree(net->ipv4.devconf_all); 1811 } 1812 1813 static __net_initdata struct pernet_operations devinet_ops = { 1814 .init = devinet_init_net, 1815 .exit = devinet_exit_net, 1816 }; 1817 1818 static struct rtnl_af_ops inet_af_ops = { 1819 .family = AF_INET, 1820 .fill_link_af = inet_fill_link_af, 1821 .get_link_af_size = inet_get_link_af_size, 1822 .validate_link_af = inet_validate_link_af, 1823 .set_link_af = inet_set_link_af, 1824 }; 1825 1826 void __init devinet_init(void) 1827 { 1828 int i; 1829 1830 for (i = 0; i < IN4_ADDR_HSIZE; i++) 1831 INIT_HLIST_HEAD(&inet_addr_lst[i]); 1832 1833 register_pernet_subsys(&devinet_ops); 1834 1835 register_gifconf(PF_INET, inet_gifconf); 1836 register_netdevice_notifier(&ip_netdev_notifier); 1837 1838 rtnl_af_register(&inet_af_ops); 1839 1840 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); 1841 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 1842 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 1843 } 1844 1845