1 /* 2 * NET3 IP device support routines. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Derived from the IP parts of dev.c 1.0.19 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Mark Evans, <evansmp@uhura.aston.ac.uk> 13 * 14 * Additional Authors: 15 * Alan Cox, <gw4pts@gw4pts.ampr.org> 16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 17 * 18 * Changes: 19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr 20 * lists. 21 * Cyrus Durgin: updated for kmod 22 * Matthias Andree: in devinet_ioctl, compare label and 23 * address (4.4BSD alias style support), 24 * fall back to comparing just the label 25 * if no match found. 26 */ 27 28 29 #include <asm/uaccess.h> 30 #include <asm/system.h> 31 #include <linux/bitops.h> 32 #include <linux/capability.h> 33 #include <linux/module.h> 34 #include <linux/types.h> 35 #include <linux/kernel.h> 36 #include <linux/string.h> 37 #include <linux/mm.h> 38 #include <linux/socket.h> 39 #include <linux/sockios.h> 40 #include <linux/in.h> 41 #include <linux/errno.h> 42 #include <linux/interrupt.h> 43 #include <linux/if_addr.h> 44 #include <linux/if_ether.h> 45 #include <linux/inet.h> 46 #include <linux/netdevice.h> 47 #include <linux/etherdevice.h> 48 #include <linux/skbuff.h> 49 #include <linux/init.h> 50 #include <linux/notifier.h> 51 #include <linux/inetdevice.h> 52 #include <linux/igmp.h> 53 #include <linux/slab.h> 54 #include <linux/hash.h> 55 #ifdef CONFIG_SYSCTL 56 #include <linux/sysctl.h> 57 #endif 58 #include <linux/kmod.h> 59 60 #include <net/arp.h> 61 #include <net/ip.h> 62 #include <net/route.h> 63 #include <net/ip_fib.h> 64 #include <net/rtnetlink.h> 65 #include <net/net_namespace.h> 66 67 static struct ipv4_devconf ipv4_devconf = { 68 .data = { 69 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 70 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 71 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 72 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 73 }, 74 }; 75 76 static struct ipv4_devconf ipv4_devconf_dflt = { 77 .data = { 78 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, 79 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, 80 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, 81 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, 82 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, 83 }, 84 }; 85 86 #define IPV4_DEVCONF_DFLT(net, attr) \ 87 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr) 88 89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { 90 [IFA_LOCAL] = { .type = NLA_U32 }, 91 [IFA_ADDRESS] = { .type = NLA_U32 }, 92 [IFA_BROADCAST] = { .type = NLA_U32 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 94 }; 95 96 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE 97 * value. So if you change this define, make appropriate changes to 98 * inet_addr_hash as well. 99 */ 100 #define IN4_ADDR_HSIZE 256 101 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; 102 static DEFINE_SPINLOCK(inet_addr_hash_lock); 103 104 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) 105 { 106 u32 val = (__force u32) addr ^ hash_ptr(net, 8); 107 108 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & 109 (IN4_ADDR_HSIZE - 1)); 110 } 111 112 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) 113 { 114 unsigned int hash = inet_addr_hash(net, ifa->ifa_local); 115 116 spin_lock(&inet_addr_hash_lock); 117 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); 118 spin_unlock(&inet_addr_hash_lock); 119 } 120 121 static void inet_hash_remove(struct in_ifaddr *ifa) 122 { 123 spin_lock(&inet_addr_hash_lock); 124 hlist_del_init_rcu(&ifa->hash); 125 spin_unlock(&inet_addr_hash_lock); 126 } 127 128 /** 129 * __ip_dev_find - find the first device with a given source address. 130 * @net: the net namespace 131 * @addr: the source address 132 * @devref: if true, take a reference on the found device 133 * 134 * If a caller uses devref=false, it should be protected by RCU, or RTNL 135 */ 136 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 137 { 138 unsigned int hash = inet_addr_hash(net, addr); 139 struct net_device *result = NULL; 140 struct in_ifaddr *ifa; 141 struct hlist_node *node; 142 143 rcu_read_lock(); 144 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { 145 struct net_device *dev = ifa->ifa_dev->dev; 146 147 if (!net_eq(dev_net(dev), net)) 148 continue; 149 if (ifa->ifa_local == addr) { 150 result = dev; 151 break; 152 } 153 } 154 if (result && devref) 155 dev_hold(result); 156 rcu_read_unlock(); 157 return result; 158 } 159 EXPORT_SYMBOL(__ip_dev_find); 160 161 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); 162 163 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); 164 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 165 int destroy); 166 #ifdef CONFIG_SYSCTL 167 static void devinet_sysctl_register(struct in_device *idev); 168 static void devinet_sysctl_unregister(struct in_device *idev); 169 #else 170 static inline void devinet_sysctl_register(struct in_device *idev) 171 { 172 } 173 static inline void devinet_sysctl_unregister(struct in_device *idev) 174 { 175 } 176 #endif 177 178 /* Locks all the inet devices. */ 179 180 static struct in_ifaddr *inet_alloc_ifa(void) 181 { 182 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL); 183 } 184 185 static void inet_rcu_free_ifa(struct rcu_head *head) 186 { 187 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head); 188 if (ifa->ifa_dev) 189 in_dev_put(ifa->ifa_dev); 190 kfree(ifa); 191 } 192 193 static inline void inet_free_ifa(struct in_ifaddr *ifa) 194 { 195 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa); 196 } 197 198 void in_dev_finish_destroy(struct in_device *idev) 199 { 200 struct net_device *dev = idev->dev; 201 202 WARN_ON(idev->ifa_list); 203 WARN_ON(idev->mc_list); 204 #ifdef NET_REFCNT_DEBUG 205 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", 206 idev, dev ? dev->name : "NIL"); 207 #endif 208 dev_put(dev); 209 if (!idev->dead) 210 pr_err("Freeing alive in_device %p\n", idev); 211 else 212 kfree(idev); 213 } 214 EXPORT_SYMBOL(in_dev_finish_destroy); 215 216 static struct in_device *inetdev_init(struct net_device *dev) 217 { 218 struct in_device *in_dev; 219 220 ASSERT_RTNL(); 221 222 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL); 223 if (!in_dev) 224 goto out; 225 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, 226 sizeof(in_dev->cnf)); 227 in_dev->cnf.sysctl = NULL; 228 in_dev->dev = dev; 229 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); 230 if (!in_dev->arp_parms) 231 goto out_kfree; 232 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 233 dev_disable_lro(dev); 234 /* Reference in_dev->dev */ 235 dev_hold(dev); 236 /* Account for reference dev->ip_ptr (below) */ 237 in_dev_hold(in_dev); 238 239 devinet_sysctl_register(in_dev); 240 ip_mc_init_dev(in_dev); 241 if (dev->flags & IFF_UP) 242 ip_mc_up(in_dev); 243 244 /* we can receive as soon as ip_ptr is set -- do this last */ 245 rcu_assign_pointer(dev->ip_ptr, in_dev); 246 out: 247 return in_dev; 248 out_kfree: 249 kfree(in_dev); 250 in_dev = NULL; 251 goto out; 252 } 253 254 static void in_dev_rcu_put(struct rcu_head *head) 255 { 256 struct in_device *idev = container_of(head, struct in_device, rcu_head); 257 in_dev_put(idev); 258 } 259 260 static void inetdev_destroy(struct in_device *in_dev) 261 { 262 struct in_ifaddr *ifa; 263 struct net_device *dev; 264 265 ASSERT_RTNL(); 266 267 dev = in_dev->dev; 268 269 in_dev->dead = 1; 270 271 ip_mc_destroy_dev(in_dev); 272 273 while ((ifa = in_dev->ifa_list) != NULL) { 274 inet_del_ifa(in_dev, &in_dev->ifa_list, 0); 275 inet_free_ifa(ifa); 276 } 277 278 rcu_assign_pointer(dev->ip_ptr, NULL); 279 280 devinet_sysctl_unregister(in_dev); 281 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 282 arp_ifdown(dev); 283 284 call_rcu(&in_dev->rcu_head, in_dev_rcu_put); 285 } 286 287 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) 288 { 289 rcu_read_lock(); 290 for_primary_ifa(in_dev) { 291 if (inet_ifa_match(a, ifa)) { 292 if (!b || inet_ifa_match(b, ifa)) { 293 rcu_read_unlock(); 294 return 1; 295 } 296 } 297 } endfor_ifa(in_dev); 298 rcu_read_unlock(); 299 return 0; 300 } 301 302 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 303 int destroy, struct nlmsghdr *nlh, u32 pid) 304 { 305 struct in_ifaddr *promote = NULL; 306 struct in_ifaddr *ifa, *ifa1 = *ifap; 307 struct in_ifaddr *last_prim = in_dev->ifa_list; 308 struct in_ifaddr *prev_prom = NULL; 309 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); 310 311 ASSERT_RTNL(); 312 313 /* 1. Deleting primary ifaddr forces deletion all secondaries 314 * unless alias promotion is set 315 **/ 316 317 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { 318 struct in_ifaddr **ifap1 = &ifa1->ifa_next; 319 320 while ((ifa = *ifap1) != NULL) { 321 if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 322 ifa1->ifa_scope <= ifa->ifa_scope) 323 last_prim = ifa; 324 325 if (!(ifa->ifa_flags & IFA_F_SECONDARY) || 326 ifa1->ifa_mask != ifa->ifa_mask || 327 !inet_ifa_match(ifa1->ifa_address, ifa)) { 328 ifap1 = &ifa->ifa_next; 329 prev_prom = ifa; 330 continue; 331 } 332 333 if (!do_promote) { 334 inet_hash_remove(ifa); 335 *ifap1 = ifa->ifa_next; 336 337 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); 338 blocking_notifier_call_chain(&inetaddr_chain, 339 NETDEV_DOWN, ifa); 340 inet_free_ifa(ifa); 341 } else { 342 promote = ifa; 343 break; 344 } 345 } 346 } 347 348 /* 2. Unlink it */ 349 350 *ifap = ifa1->ifa_next; 351 inet_hash_remove(ifa1); 352 353 /* 3. Announce address deletion */ 354 355 /* Send message first, then call notifier. 356 At first sight, FIB update triggered by notifier 357 will refer to already deleted ifaddr, that could confuse 358 netlink listeners. It is not true: look, gated sees 359 that route deleted and if it still thinks that ifaddr 360 is valid, it will try to restore deleted routes... Grr. 361 So that, this order is correct. 362 */ 363 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); 364 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); 365 366 if (promote) { 367 368 if (prev_prom) { 369 prev_prom->ifa_next = promote->ifa_next; 370 promote->ifa_next = last_prim->ifa_next; 371 last_prim->ifa_next = promote; 372 } 373 374 promote->ifa_flags &= ~IFA_F_SECONDARY; 375 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); 376 blocking_notifier_call_chain(&inetaddr_chain, 377 NETDEV_UP, promote); 378 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { 379 if (ifa1->ifa_mask != ifa->ifa_mask || 380 !inet_ifa_match(ifa1->ifa_address, ifa)) 381 continue; 382 fib_add_ifaddr(ifa); 383 } 384 385 } 386 if (destroy) 387 inet_free_ifa(ifa1); 388 } 389 390 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, 391 int destroy) 392 { 393 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); 394 } 395 396 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, 397 u32 pid) 398 { 399 struct in_device *in_dev = ifa->ifa_dev; 400 struct in_ifaddr *ifa1, **ifap, **last_primary; 401 402 ASSERT_RTNL(); 403 404 if (!ifa->ifa_local) { 405 inet_free_ifa(ifa); 406 return 0; 407 } 408 409 ifa->ifa_flags &= ~IFA_F_SECONDARY; 410 last_primary = &in_dev->ifa_list; 411 412 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; 413 ifap = &ifa1->ifa_next) { 414 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && 415 ifa->ifa_scope <= ifa1->ifa_scope) 416 last_primary = &ifa1->ifa_next; 417 if (ifa1->ifa_mask == ifa->ifa_mask && 418 inet_ifa_match(ifa1->ifa_address, ifa)) { 419 if (ifa1->ifa_local == ifa->ifa_local) { 420 inet_free_ifa(ifa); 421 return -EEXIST; 422 } 423 if (ifa1->ifa_scope != ifa->ifa_scope) { 424 inet_free_ifa(ifa); 425 return -EINVAL; 426 } 427 ifa->ifa_flags |= IFA_F_SECONDARY; 428 } 429 } 430 431 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) { 432 net_srandom(ifa->ifa_local); 433 ifap = last_primary; 434 } 435 436 ifa->ifa_next = *ifap; 437 *ifap = ifa; 438 439 inet_hash_insert(dev_net(in_dev->dev), ifa); 440 441 /* Send message first, then call notifier. 442 Notifier will trigger FIB update, so that 443 listeners of netlink will know about new ifaddr */ 444 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); 445 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 446 447 return 0; 448 } 449 450 static int inet_insert_ifa(struct in_ifaddr *ifa) 451 { 452 return __inet_insert_ifa(ifa, NULL, 0); 453 } 454 455 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 456 { 457 struct in_device *in_dev = __in_dev_get_rtnl(dev); 458 459 ASSERT_RTNL(); 460 461 if (!in_dev) { 462 inet_free_ifa(ifa); 463 return -ENOBUFS; 464 } 465 ipv4_devconf_setall(in_dev); 466 if (ifa->ifa_dev != in_dev) { 467 WARN_ON(ifa->ifa_dev); 468 in_dev_hold(in_dev); 469 ifa->ifa_dev = in_dev; 470 } 471 if (ipv4_is_loopback(ifa->ifa_local)) 472 ifa->ifa_scope = RT_SCOPE_HOST; 473 return inet_insert_ifa(ifa); 474 } 475 476 /* Caller must hold RCU or RTNL : 477 * We dont take a reference on found in_device 478 */ 479 struct in_device *inetdev_by_index(struct net *net, int ifindex) 480 { 481 struct net_device *dev; 482 struct in_device *in_dev = NULL; 483 484 rcu_read_lock(); 485 dev = dev_get_by_index_rcu(net, ifindex); 486 if (dev) 487 in_dev = rcu_dereference_rtnl(dev->ip_ptr); 488 rcu_read_unlock(); 489 return in_dev; 490 } 491 EXPORT_SYMBOL(inetdev_by_index); 492 493 /* Called only from RTNL semaphored context. No locks. */ 494 495 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, 496 __be32 mask) 497 { 498 ASSERT_RTNL(); 499 500 for_primary_ifa(in_dev) { 501 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa)) 502 return ifa; 503 } endfor_ifa(in_dev); 504 return NULL; 505 } 506 507 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 508 { 509 struct net *net = sock_net(skb->sk); 510 struct nlattr *tb[IFA_MAX+1]; 511 struct in_device *in_dev; 512 struct ifaddrmsg *ifm; 513 struct in_ifaddr *ifa, **ifap; 514 int err = -EINVAL; 515 516 ASSERT_RTNL(); 517 518 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 519 if (err < 0) 520 goto errout; 521 522 ifm = nlmsg_data(nlh); 523 in_dev = inetdev_by_index(net, ifm->ifa_index); 524 if (in_dev == NULL) { 525 err = -ENODEV; 526 goto errout; 527 } 528 529 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 530 ifap = &ifa->ifa_next) { 531 if (tb[IFA_LOCAL] && 532 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) 533 continue; 534 535 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) 536 continue; 537 538 if (tb[IFA_ADDRESS] && 539 (ifm->ifa_prefixlen != ifa->ifa_prefixlen || 540 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) 541 continue; 542 543 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); 544 return 0; 545 } 546 547 err = -EADDRNOTAVAIL; 548 errout: 549 return err; 550 } 551 552 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) 553 { 554 struct nlattr *tb[IFA_MAX+1]; 555 struct in_ifaddr *ifa; 556 struct ifaddrmsg *ifm; 557 struct net_device *dev; 558 struct in_device *in_dev; 559 int err; 560 561 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 562 if (err < 0) 563 goto errout; 564 565 ifm = nlmsg_data(nlh); 566 err = -EINVAL; 567 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) 568 goto errout; 569 570 dev = __dev_get_by_index(net, ifm->ifa_index); 571 err = -ENODEV; 572 if (dev == NULL) 573 goto errout; 574 575 in_dev = __in_dev_get_rtnl(dev); 576 err = -ENOBUFS; 577 if (in_dev == NULL) 578 goto errout; 579 580 ifa = inet_alloc_ifa(); 581 if (ifa == NULL) 582 /* 583 * A potential indev allocation can be left alive, it stays 584 * assigned to its device and is destroy with it. 585 */ 586 goto errout; 587 588 ipv4_devconf_setall(in_dev); 589 in_dev_hold(in_dev); 590 591 if (tb[IFA_ADDRESS] == NULL) 592 tb[IFA_ADDRESS] = tb[IFA_LOCAL]; 593 594 INIT_HLIST_NODE(&ifa->hash); 595 ifa->ifa_prefixlen = ifm->ifa_prefixlen; 596 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); 597 ifa->ifa_flags = ifm->ifa_flags; 598 ifa->ifa_scope = ifm->ifa_scope; 599 ifa->ifa_dev = in_dev; 600 601 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); 602 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); 603 604 if (tb[IFA_BROADCAST]) 605 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 606 607 if (tb[IFA_LABEL]) 608 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 609 else 610 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 611 612 return ifa; 613 614 errout: 615 return ERR_PTR(err); 616 } 617 618 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 619 { 620 struct net *net = sock_net(skb->sk); 621 struct in_ifaddr *ifa; 622 623 ASSERT_RTNL(); 624 625 ifa = rtm_to_ifaddr(net, nlh); 626 if (IS_ERR(ifa)) 627 return PTR_ERR(ifa); 628 629 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); 630 } 631 632 /* 633 * Determine a default network mask, based on the IP address. 634 */ 635 636 static inline int inet_abc_len(__be32 addr) 637 { 638 int rc = -1; /* Something else, probably a multicast. */ 639 640 if (ipv4_is_zeronet(addr)) 641 rc = 0; 642 else { 643 __u32 haddr = ntohl(addr); 644 645 if (IN_CLASSA(haddr)) 646 rc = 8; 647 else if (IN_CLASSB(haddr)) 648 rc = 16; 649 else if (IN_CLASSC(haddr)) 650 rc = 24; 651 } 652 653 return rc; 654 } 655 656 657 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) 658 { 659 struct ifreq ifr; 660 struct sockaddr_in sin_orig; 661 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 662 struct in_device *in_dev; 663 struct in_ifaddr **ifap = NULL; 664 struct in_ifaddr *ifa = NULL; 665 struct net_device *dev; 666 char *colon; 667 int ret = -EFAULT; 668 int tryaddrmatch = 0; 669 670 /* 671 * Fetch the caller's info block into kernel space 672 */ 673 674 if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) 675 goto out; 676 ifr.ifr_name[IFNAMSIZ - 1] = 0; 677 678 /* save original address for comparison */ 679 memcpy(&sin_orig, sin, sizeof(*sin)); 680 681 colon = strchr(ifr.ifr_name, ':'); 682 if (colon) 683 *colon = 0; 684 685 dev_load(net, ifr.ifr_name); 686 687 switch (cmd) { 688 case SIOCGIFADDR: /* Get interface address */ 689 case SIOCGIFBRDADDR: /* Get the broadcast address */ 690 case SIOCGIFDSTADDR: /* Get the destination address */ 691 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 692 /* Note that these ioctls will not sleep, 693 so that we do not impose a lock. 694 One day we will be forced to put shlock here (I mean SMP) 695 */ 696 tryaddrmatch = (sin_orig.sin_family == AF_INET); 697 memset(sin, 0, sizeof(*sin)); 698 sin->sin_family = AF_INET; 699 break; 700 701 case SIOCSIFFLAGS: 702 ret = -EACCES; 703 if (!capable(CAP_NET_ADMIN)) 704 goto out; 705 break; 706 case SIOCSIFADDR: /* Set interface address (and family) */ 707 case SIOCSIFBRDADDR: /* Set the broadcast address */ 708 case SIOCSIFDSTADDR: /* Set the destination address */ 709 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 710 ret = -EACCES; 711 if (!capable(CAP_NET_ADMIN)) 712 goto out; 713 ret = -EINVAL; 714 if (sin->sin_family != AF_INET) 715 goto out; 716 break; 717 default: 718 ret = -EINVAL; 719 goto out; 720 } 721 722 rtnl_lock(); 723 724 ret = -ENODEV; 725 dev = __dev_get_by_name(net, ifr.ifr_name); 726 if (!dev) 727 goto done; 728 729 if (colon) 730 *colon = ':'; 731 732 in_dev = __in_dev_get_rtnl(dev); 733 if (in_dev) { 734 if (tryaddrmatch) { 735 /* Matthias Andree */ 736 /* compare label and address (4.4BSD style) */ 737 /* note: we only do this for a limited set of ioctls 738 and only if the original address family was AF_INET. 739 This is checked above. */ 740 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 741 ifap = &ifa->ifa_next) { 742 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 743 sin_orig.sin_addr.s_addr == 744 ifa->ifa_local) { 745 break; /* found */ 746 } 747 } 748 } 749 /* we didn't get a match, maybe the application is 750 4.3BSD-style and passed in junk so we fall back to 751 comparing just the label */ 752 if (!ifa) { 753 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 754 ifap = &ifa->ifa_next) 755 if (!strcmp(ifr.ifr_name, ifa->ifa_label)) 756 break; 757 } 758 } 759 760 ret = -EADDRNOTAVAIL; 761 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 762 goto done; 763 764 switch (cmd) { 765 case SIOCGIFADDR: /* Get interface address */ 766 sin->sin_addr.s_addr = ifa->ifa_local; 767 goto rarok; 768 769 case SIOCGIFBRDADDR: /* Get the broadcast address */ 770 sin->sin_addr.s_addr = ifa->ifa_broadcast; 771 goto rarok; 772 773 case SIOCGIFDSTADDR: /* Get the destination address */ 774 sin->sin_addr.s_addr = ifa->ifa_address; 775 goto rarok; 776 777 case SIOCGIFNETMASK: /* Get the netmask for the interface */ 778 sin->sin_addr.s_addr = ifa->ifa_mask; 779 goto rarok; 780 781 case SIOCSIFFLAGS: 782 if (colon) { 783 ret = -EADDRNOTAVAIL; 784 if (!ifa) 785 break; 786 ret = 0; 787 if (!(ifr.ifr_flags & IFF_UP)) 788 inet_del_ifa(in_dev, ifap, 1); 789 break; 790 } 791 ret = dev_change_flags(dev, ifr.ifr_flags); 792 break; 793 794 case SIOCSIFADDR: /* Set interface address (and family) */ 795 ret = -EINVAL; 796 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 797 break; 798 799 if (!ifa) { 800 ret = -ENOBUFS; 801 ifa = inet_alloc_ifa(); 802 INIT_HLIST_NODE(&ifa->hash); 803 if (!ifa) 804 break; 805 if (colon) 806 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 807 else 808 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 809 } else { 810 ret = 0; 811 if (ifa->ifa_local == sin->sin_addr.s_addr) 812 break; 813 inet_del_ifa(in_dev, ifap, 0); 814 ifa->ifa_broadcast = 0; 815 ifa->ifa_scope = 0; 816 } 817 818 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 819 820 if (!(dev->flags & IFF_POINTOPOINT)) { 821 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 822 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 823 if ((dev->flags & IFF_BROADCAST) && 824 ifa->ifa_prefixlen < 31) 825 ifa->ifa_broadcast = ifa->ifa_address | 826 ~ifa->ifa_mask; 827 } else { 828 ifa->ifa_prefixlen = 32; 829 ifa->ifa_mask = inet_make_mask(32); 830 } 831 ret = inet_set_ifa(dev, ifa); 832 break; 833 834 case SIOCSIFBRDADDR: /* Set the broadcast address */ 835 ret = 0; 836 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { 837 inet_del_ifa(in_dev, ifap, 0); 838 ifa->ifa_broadcast = sin->sin_addr.s_addr; 839 inet_insert_ifa(ifa); 840 } 841 break; 842 843 case SIOCSIFDSTADDR: /* Set the destination address */ 844 ret = 0; 845 if (ifa->ifa_address == sin->sin_addr.s_addr) 846 break; 847 ret = -EINVAL; 848 if (inet_abc_len(sin->sin_addr.s_addr) < 0) 849 break; 850 ret = 0; 851 inet_del_ifa(in_dev, ifap, 0); 852 ifa->ifa_address = sin->sin_addr.s_addr; 853 inet_insert_ifa(ifa); 854 break; 855 856 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 857 858 /* 859 * The mask we set must be legal. 860 */ 861 ret = -EINVAL; 862 if (bad_mask(sin->sin_addr.s_addr, 0)) 863 break; 864 ret = 0; 865 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 866 __be32 old_mask = ifa->ifa_mask; 867 inet_del_ifa(in_dev, ifap, 0); 868 ifa->ifa_mask = sin->sin_addr.s_addr; 869 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 870 871 /* See if current broadcast address matches 872 * with current netmask, then recalculate 873 * the broadcast address. Otherwise it's a 874 * funny address, so don't touch it since 875 * the user seems to know what (s)he's doing... 876 */ 877 if ((dev->flags & IFF_BROADCAST) && 878 (ifa->ifa_prefixlen < 31) && 879 (ifa->ifa_broadcast == 880 (ifa->ifa_local|~old_mask))) { 881 ifa->ifa_broadcast = (ifa->ifa_local | 882 ~sin->sin_addr.s_addr); 883 } 884 inet_insert_ifa(ifa); 885 } 886 break; 887 } 888 done: 889 rtnl_unlock(); 890 out: 891 return ret; 892 rarok: 893 rtnl_unlock(); 894 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; 895 goto out; 896 } 897 898 static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 899 { 900 struct in_device *in_dev = __in_dev_get_rtnl(dev); 901 struct in_ifaddr *ifa; 902 struct ifreq ifr; 903 int done = 0; 904 905 if (!in_dev) 906 goto out; 907 908 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 909 if (!buf) { 910 done += sizeof(ifr); 911 continue; 912 } 913 if (len < (int) sizeof(ifr)) 914 break; 915 memset(&ifr, 0, sizeof(struct ifreq)); 916 if (ifa->ifa_label) 917 strcpy(ifr.ifr_name, ifa->ifa_label); 918 else 919 strcpy(ifr.ifr_name, dev->name); 920 921 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; 922 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = 923 ifa->ifa_local; 924 925 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { 926 done = -EFAULT; 927 break; 928 } 929 buf += sizeof(struct ifreq); 930 len -= sizeof(struct ifreq); 931 done += sizeof(struct ifreq); 932 } 933 out: 934 return done; 935 } 936 937 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) 938 { 939 __be32 addr = 0; 940 struct in_device *in_dev; 941 struct net *net = dev_net(dev); 942 943 rcu_read_lock(); 944 in_dev = __in_dev_get_rcu(dev); 945 if (!in_dev) 946 goto no_in_dev; 947 948 for_primary_ifa(in_dev) { 949 if (ifa->ifa_scope > scope) 950 continue; 951 if (!dst || inet_ifa_match(dst, ifa)) { 952 addr = ifa->ifa_local; 953 break; 954 } 955 if (!addr) 956 addr = ifa->ifa_local; 957 } endfor_ifa(in_dev); 958 959 if (addr) 960 goto out_unlock; 961 no_in_dev: 962 963 /* Not loopback addresses on loopback should be preferred 964 in this case. It is importnat that lo is the first interface 965 in dev_base list. 966 */ 967 for_each_netdev_rcu(net, dev) { 968 in_dev = __in_dev_get_rcu(dev); 969 if (!in_dev) 970 continue; 971 972 for_primary_ifa(in_dev) { 973 if (ifa->ifa_scope != RT_SCOPE_LINK && 974 ifa->ifa_scope <= scope) { 975 addr = ifa->ifa_local; 976 goto out_unlock; 977 } 978 } endfor_ifa(in_dev); 979 } 980 out_unlock: 981 rcu_read_unlock(); 982 return addr; 983 } 984 EXPORT_SYMBOL(inet_select_addr); 985 986 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 987 __be32 local, int scope) 988 { 989 int same = 0; 990 __be32 addr = 0; 991 992 for_ifa(in_dev) { 993 if (!addr && 994 (local == ifa->ifa_local || !local) && 995 ifa->ifa_scope <= scope) { 996 addr = ifa->ifa_local; 997 if (same) 998 break; 999 } 1000 if (!same) { 1001 same = (!local || inet_ifa_match(local, ifa)) && 1002 (!dst || inet_ifa_match(dst, ifa)); 1003 if (same && addr) { 1004 if (local || !dst) 1005 break; 1006 /* Is the selected addr into dst subnet? */ 1007 if (inet_ifa_match(addr, ifa)) 1008 break; 1009 /* No, then can we use new local src? */ 1010 if (ifa->ifa_scope <= scope) { 1011 addr = ifa->ifa_local; 1012 break; 1013 } 1014 /* search for large dst subnet for addr */ 1015 same = 0; 1016 } 1017 } 1018 } endfor_ifa(in_dev); 1019 1020 return same ? addr : 0; 1021 } 1022 1023 /* 1024 * Confirm that local IP address exists using wildcards: 1025 * - in_dev: only on this interface, 0=any interface 1026 * - dst: only in the same subnet as dst, 0=any dst 1027 * - local: address, 0=autoselect the local address 1028 * - scope: maximum allowed scope value for the local address 1029 */ 1030 __be32 inet_confirm_addr(struct in_device *in_dev, 1031 __be32 dst, __be32 local, int scope) 1032 { 1033 __be32 addr = 0; 1034 struct net_device *dev; 1035 struct net *net; 1036 1037 if (scope != RT_SCOPE_LINK) 1038 return confirm_addr_indev(in_dev, dst, local, scope); 1039 1040 net = dev_net(in_dev->dev); 1041 rcu_read_lock(); 1042 for_each_netdev_rcu(net, dev) { 1043 in_dev = __in_dev_get_rcu(dev); 1044 if (in_dev) { 1045 addr = confirm_addr_indev(in_dev, dst, local, scope); 1046 if (addr) 1047 break; 1048 } 1049 } 1050 rcu_read_unlock(); 1051 1052 return addr; 1053 } 1054 1055 /* 1056 * Device notifier 1057 */ 1058 1059 int register_inetaddr_notifier(struct notifier_block *nb) 1060 { 1061 return blocking_notifier_chain_register(&inetaddr_chain, nb); 1062 } 1063 EXPORT_SYMBOL(register_inetaddr_notifier); 1064 1065 int unregister_inetaddr_notifier(struct notifier_block *nb) 1066 { 1067 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 1068 } 1069 EXPORT_SYMBOL(unregister_inetaddr_notifier); 1070 1071 /* Rename ifa_labels for a device name change. Make some effort to preserve 1072 * existing alias numbering and to create unique labels if possible. 1073 */ 1074 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1075 { 1076 struct in_ifaddr *ifa; 1077 int named = 0; 1078 1079 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 1080 char old[IFNAMSIZ], *dot; 1081 1082 memcpy(old, ifa->ifa_label, IFNAMSIZ); 1083 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1084 if (named++ == 0) 1085 goto skip; 1086 dot = strchr(old, ':'); 1087 if (dot == NULL) { 1088 sprintf(old, ":%d", named); 1089 dot = old; 1090 } 1091 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) 1092 strcat(ifa->ifa_label, dot); 1093 else 1094 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1095 skip: 1096 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1097 } 1098 } 1099 1100 static inline bool inetdev_valid_mtu(unsigned mtu) 1101 { 1102 return mtu >= 68; 1103 } 1104 1105 static void inetdev_send_gratuitous_arp(struct net_device *dev, 1106 struct in_device *in_dev) 1107 1108 { 1109 struct in_ifaddr *ifa = in_dev->ifa_list; 1110 1111 if (!ifa) 1112 return; 1113 1114 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1115 ifa->ifa_local, dev, 1116 ifa->ifa_local, NULL, 1117 dev->dev_addr, NULL); 1118 } 1119 1120 /* Called only under RTNL semaphore */ 1121 1122 static int inetdev_event(struct notifier_block *this, unsigned long event, 1123 void *ptr) 1124 { 1125 struct net_device *dev = ptr; 1126 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1127 1128 ASSERT_RTNL(); 1129 1130 if (!in_dev) { 1131 if (event == NETDEV_REGISTER) { 1132 in_dev = inetdev_init(dev); 1133 if (!in_dev) 1134 return notifier_from_errno(-ENOMEM); 1135 if (dev->flags & IFF_LOOPBACK) { 1136 IN_DEV_CONF_SET(in_dev, NOXFRM, 1); 1137 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); 1138 } 1139 } else if (event == NETDEV_CHANGEMTU) { 1140 /* Re-enabling IP */ 1141 if (inetdev_valid_mtu(dev->mtu)) 1142 in_dev = inetdev_init(dev); 1143 } 1144 goto out; 1145 } 1146 1147 switch (event) { 1148 case NETDEV_REGISTER: 1149 printk(KERN_DEBUG "inetdev_event: bug\n"); 1150 rcu_assign_pointer(dev->ip_ptr, NULL); 1151 break; 1152 case NETDEV_UP: 1153 if (!inetdev_valid_mtu(dev->mtu)) 1154 break; 1155 if (dev->flags & IFF_LOOPBACK) { 1156 struct in_ifaddr *ifa = inet_alloc_ifa(); 1157 1158 if (ifa) { 1159 INIT_HLIST_NODE(&ifa->hash); 1160 ifa->ifa_local = 1161 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1162 ifa->ifa_prefixlen = 8; 1163 ifa->ifa_mask = inet_make_mask(8); 1164 in_dev_hold(in_dev); 1165 ifa->ifa_dev = in_dev; 1166 ifa->ifa_scope = RT_SCOPE_HOST; 1167 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 1168 inet_insert_ifa(ifa); 1169 } 1170 } 1171 ip_mc_up(in_dev); 1172 /* fall through */ 1173 case NETDEV_CHANGEADDR: 1174 if (!IN_DEV_ARP_NOTIFY(in_dev)) 1175 break; 1176 /* fall through */ 1177 case NETDEV_NOTIFY_PEERS: 1178 /* Send gratuitous ARP to notify of link change */ 1179 inetdev_send_gratuitous_arp(dev, in_dev); 1180 break; 1181 case NETDEV_DOWN: 1182 ip_mc_down(in_dev); 1183 break; 1184 case NETDEV_PRE_TYPE_CHANGE: 1185 ip_mc_unmap(in_dev); 1186 break; 1187 case NETDEV_POST_TYPE_CHANGE: 1188 ip_mc_remap(in_dev); 1189 break; 1190 case NETDEV_CHANGEMTU: 1191 if (inetdev_valid_mtu(dev->mtu)) 1192 break; 1193 /* disable IP when MTU is not enough */ 1194 case NETDEV_UNREGISTER: 1195 inetdev_destroy(in_dev); 1196 break; 1197 case NETDEV_CHANGENAME: 1198 /* Do not notify about label change, this event is 1199 * not interesting to applications using netlink. 1200 */ 1201 inetdev_changename(dev, in_dev); 1202 1203 devinet_sysctl_unregister(in_dev); 1204 devinet_sysctl_register(in_dev); 1205 break; 1206 } 1207 out: 1208 return NOTIFY_DONE; 1209 } 1210 1211 static struct notifier_block ip_netdev_notifier = { 1212 .notifier_call = inetdev_event, 1213 }; 1214 1215 static inline size_t inet_nlmsg_size(void) 1216 { 1217 return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) 1218 + nla_total_size(4) /* IFA_ADDRESS */ 1219 + nla_total_size(4) /* IFA_LOCAL */ 1220 + nla_total_size(4) /* IFA_BROADCAST */ 1221 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1222 } 1223 1224 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1225 u32 pid, u32 seq, int event, unsigned int flags) 1226 { 1227 struct ifaddrmsg *ifm; 1228 struct nlmsghdr *nlh; 1229 1230 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); 1231 if (nlh == NULL) 1232 return -EMSGSIZE; 1233 1234 ifm = nlmsg_data(nlh); 1235 ifm->ifa_family = AF_INET; 1236 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1237 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; 1238 ifm->ifa_scope = ifa->ifa_scope; 1239 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1240 1241 if (ifa->ifa_address) 1242 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); 1243 1244 if (ifa->ifa_local) 1245 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); 1246 1247 if (ifa->ifa_broadcast) 1248 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1249 1250 if (ifa->ifa_label[0]) 1251 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1252 1253 return nlmsg_end(skb, nlh); 1254 1255 nla_put_failure: 1256 nlmsg_cancel(skb, nlh); 1257 return -EMSGSIZE; 1258 } 1259 1260 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1261 { 1262 struct net *net = sock_net(skb->sk); 1263 int h, s_h; 1264 int idx, s_idx; 1265 int ip_idx, s_ip_idx; 1266 struct net_device *dev; 1267 struct in_device *in_dev; 1268 struct in_ifaddr *ifa; 1269 struct hlist_head *head; 1270 struct hlist_node *node; 1271 1272 s_h = cb->args[0]; 1273 s_idx = idx = cb->args[1]; 1274 s_ip_idx = ip_idx = cb->args[2]; 1275 1276 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1277 idx = 0; 1278 head = &net->dev_index_head[h]; 1279 rcu_read_lock(); 1280 hlist_for_each_entry_rcu(dev, node, head, index_hlist) { 1281 if (idx < s_idx) 1282 goto cont; 1283 if (h > s_h || idx > s_idx) 1284 s_ip_idx = 0; 1285 in_dev = __in_dev_get_rcu(dev); 1286 if (!in_dev) 1287 goto cont; 1288 1289 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1290 ifa = ifa->ifa_next, ip_idx++) { 1291 if (ip_idx < s_ip_idx) 1292 continue; 1293 if (inet_fill_ifaddr(skb, ifa, 1294 NETLINK_CB(cb->skb).pid, 1295 cb->nlh->nlmsg_seq, 1296 RTM_NEWADDR, NLM_F_MULTI) <= 0) { 1297 rcu_read_unlock(); 1298 goto done; 1299 } 1300 } 1301 cont: 1302 idx++; 1303 } 1304 rcu_read_unlock(); 1305 } 1306 1307 done: 1308 cb->args[0] = h; 1309 cb->args[1] = idx; 1310 cb->args[2] = ip_idx; 1311 1312 return skb->len; 1313 } 1314 1315 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, 1316 u32 pid) 1317 { 1318 struct sk_buff *skb; 1319 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1320 int err = -ENOBUFS; 1321 struct net *net; 1322 1323 net = dev_net(ifa->ifa_dev->dev); 1324 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1325 if (skb == NULL) 1326 goto errout; 1327 1328 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); 1329 if (err < 0) { 1330 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ 1331 WARN_ON(err == -EMSGSIZE); 1332 kfree_skb(skb); 1333 goto errout; 1334 } 1335 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1336 return; 1337 errout: 1338 if (err < 0) 1339 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1340 } 1341 1342 static size_t inet_get_link_af_size(const struct net_device *dev) 1343 { 1344 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1345 1346 if (!in_dev) 1347 return 0; 1348 1349 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ 1350 } 1351 1352 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) 1353 { 1354 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1355 struct nlattr *nla; 1356 int i; 1357 1358 if (!in_dev) 1359 return -ENODATA; 1360 1361 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4); 1362 if (nla == NULL) 1363 return -EMSGSIZE; 1364 1365 for (i = 0; i < IPV4_DEVCONF_MAX; i++) 1366 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i]; 1367 1368 return 0; 1369 } 1370 1371 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { 1372 [IFLA_INET_CONF] = { .type = NLA_NESTED }, 1373 }; 1374 1375 static int inet_validate_link_af(const struct net_device *dev, 1376 const struct nlattr *nla) 1377 { 1378 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1379 int err, rem; 1380 1381 if (dev && !__in_dev_get_rtnl(dev)) 1382 return -EAFNOSUPPORT; 1383 1384 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); 1385 if (err < 0) 1386 return err; 1387 1388 if (tb[IFLA_INET_CONF]) { 1389 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { 1390 int cfgid = nla_type(a); 1391 1392 if (nla_len(a) < 4) 1393 return -EINVAL; 1394 1395 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) 1396 return -EINVAL; 1397 } 1398 } 1399 1400 return 0; 1401 } 1402 1403 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) 1404 { 1405 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1406 struct nlattr *a, *tb[IFLA_INET_MAX+1]; 1407 int rem; 1408 1409 if (!in_dev) 1410 return -EAFNOSUPPORT; 1411 1412 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) 1413 BUG(); 1414 1415 if (tb[IFLA_INET_CONF]) { 1416 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) 1417 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); 1418 } 1419 1420 return 0; 1421 } 1422 1423 #ifdef CONFIG_SYSCTL 1424 1425 static void devinet_copy_dflt_conf(struct net *net, int i) 1426 { 1427 struct net_device *dev; 1428 1429 rcu_read_lock(); 1430 for_each_netdev_rcu(net, dev) { 1431 struct in_device *in_dev; 1432 1433 in_dev = __in_dev_get_rcu(dev); 1434 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1435 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 1436 } 1437 rcu_read_unlock(); 1438 } 1439 1440 /* called with RTNL locked */ 1441 static void inet_forward_change(struct net *net) 1442 { 1443 struct net_device *dev; 1444 int on = IPV4_DEVCONF_ALL(net, FORWARDING); 1445 1446 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1447 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1448 1449 for_each_netdev(net, dev) { 1450 struct in_device *in_dev; 1451 if (on) 1452 dev_disable_lro(dev); 1453 rcu_read_lock(); 1454 in_dev = __in_dev_get_rcu(dev); 1455 if (in_dev) 1456 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1457 rcu_read_unlock(); 1458 } 1459 } 1460 1461 static int devinet_conf_proc(ctl_table *ctl, int write, 1462 void __user *buffer, 1463 size_t *lenp, loff_t *ppos) 1464 { 1465 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1466 1467 if (write) { 1468 struct ipv4_devconf *cnf = ctl->extra1; 1469 struct net *net = ctl->extra2; 1470 int i = (int *)ctl->data - cnf->data; 1471 1472 set_bit(i, cnf->state); 1473 1474 if (cnf == net->ipv4.devconf_dflt) 1475 devinet_copy_dflt_conf(net, i); 1476 } 1477 1478 return ret; 1479 } 1480 1481 static int devinet_sysctl_forward(ctl_table *ctl, int write, 1482 void __user *buffer, 1483 size_t *lenp, loff_t *ppos) 1484 { 1485 int *valp = ctl->data; 1486 int val = *valp; 1487 loff_t pos = *ppos; 1488 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1489 1490 if (write && *valp != val) { 1491 struct net *net = ctl->extra2; 1492 1493 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) { 1494 if (!rtnl_trylock()) { 1495 /* Restore the original values before restarting */ 1496 *valp = val; 1497 *ppos = pos; 1498 return restart_syscall(); 1499 } 1500 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { 1501 inet_forward_change(net); 1502 } else if (*valp) { 1503 struct ipv4_devconf *cnf = ctl->extra1; 1504 struct in_device *idev = 1505 container_of(cnf, struct in_device, cnf); 1506 dev_disable_lro(idev->dev); 1507 } 1508 rtnl_unlock(); 1509 rt_cache_flush(net, 0); 1510 } 1511 } 1512 1513 return ret; 1514 } 1515 1516 static int ipv4_doint_and_flush(ctl_table *ctl, int write, 1517 void __user *buffer, 1518 size_t *lenp, loff_t *ppos) 1519 { 1520 int *valp = ctl->data; 1521 int val = *valp; 1522 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1523 struct net *net = ctl->extra2; 1524 1525 if (write && *valp != val) 1526 rt_cache_flush(net, 0); 1527 1528 return ret; 1529 } 1530 1531 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ 1532 { \ 1533 .procname = name, \ 1534 .data = ipv4_devconf.data + \ 1535 IPV4_DEVCONF_ ## attr - 1, \ 1536 .maxlen = sizeof(int), \ 1537 .mode = mval, \ 1538 .proc_handler = proc, \ 1539 .extra1 = &ipv4_devconf, \ 1540 } 1541 1542 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ 1543 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) 1544 1545 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ 1546 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) 1547 1548 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ 1549 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) 1550 1551 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ 1552 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) 1553 1554 static struct devinet_sysctl_table { 1555 struct ctl_table_header *sysctl_header; 1556 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX]; 1557 char *dev_name; 1558 } devinet_sysctl = { 1559 .devinet_vars = { 1560 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", 1561 devinet_sysctl_forward), 1562 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), 1563 1564 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), 1565 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"), 1566 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"), 1567 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"), 1568 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), 1569 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, 1570 "accept_source_route"), 1571 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), 1572 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"), 1573 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), 1574 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), 1575 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), 1576 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"), 1577 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"), 1578 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"), 1579 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 1580 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1581 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1582 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), 1583 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), 1584 1585 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1586 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1587 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION, 1588 "force_igmp_version"), 1589 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, 1590 "promote_secondaries"), 1591 }, 1592 }; 1593 1594 static int __devinet_sysctl_register(struct net *net, char *dev_name, 1595 struct ipv4_devconf *p) 1596 { 1597 int i; 1598 struct devinet_sysctl_table *t; 1599 1600 #define DEVINET_CTL_PATH_DEV 3 1601 1602 struct ctl_path devinet_ctl_path[] = { 1603 { .procname = "net", }, 1604 { .procname = "ipv4", }, 1605 { .procname = "conf", }, 1606 { /* to be set */ }, 1607 { }, 1608 }; 1609 1610 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); 1611 if (!t) 1612 goto out; 1613 1614 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { 1615 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; 1616 t->devinet_vars[i].extra1 = p; 1617 t->devinet_vars[i].extra2 = net; 1618 } 1619 1620 /* 1621 * Make a copy of dev_name, because '.procname' is regarded as const 1622 * by sysctl and we wouldn't want anyone to change it under our feet 1623 * (see SIOCSIFNAME). 1624 */ 1625 t->dev_name = kstrdup(dev_name, GFP_KERNEL); 1626 if (!t->dev_name) 1627 goto free; 1628 1629 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; 1630 1631 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, 1632 t->devinet_vars); 1633 if (!t->sysctl_header) 1634 goto free_procname; 1635 1636 p->sysctl = t; 1637 return 0; 1638 1639 free_procname: 1640 kfree(t->dev_name); 1641 free: 1642 kfree(t); 1643 out: 1644 return -ENOBUFS; 1645 } 1646 1647 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) 1648 { 1649 struct devinet_sysctl_table *t = cnf->sysctl; 1650 1651 if (t == NULL) 1652 return; 1653 1654 cnf->sysctl = NULL; 1655 unregister_sysctl_table(t->sysctl_header); 1656 kfree(t->dev_name); 1657 kfree(t); 1658 } 1659 1660 static void devinet_sysctl_register(struct in_device *idev) 1661 { 1662 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL); 1663 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, 1664 &idev->cnf); 1665 } 1666 1667 static void devinet_sysctl_unregister(struct in_device *idev) 1668 { 1669 __devinet_sysctl_unregister(&idev->cnf); 1670 neigh_sysctl_unregister(idev->arp_parms); 1671 } 1672 1673 static struct ctl_table ctl_forward_entry[] = { 1674 { 1675 .procname = "ip_forward", 1676 .data = &ipv4_devconf.data[ 1677 IPV4_DEVCONF_FORWARDING - 1], 1678 .maxlen = sizeof(int), 1679 .mode = 0644, 1680 .proc_handler = devinet_sysctl_forward, 1681 .extra1 = &ipv4_devconf, 1682 .extra2 = &init_net, 1683 }, 1684 { }, 1685 }; 1686 1687 static __net_initdata struct ctl_path net_ipv4_path[] = { 1688 { .procname = "net", }, 1689 { .procname = "ipv4", }, 1690 { }, 1691 }; 1692 #endif 1693 1694 static __net_init int devinet_init_net(struct net *net) 1695 { 1696 int err; 1697 struct ipv4_devconf *all, *dflt; 1698 #ifdef CONFIG_SYSCTL 1699 struct ctl_table *tbl = ctl_forward_entry; 1700 struct ctl_table_header *forw_hdr; 1701 #endif 1702 1703 err = -ENOMEM; 1704 all = &ipv4_devconf; 1705 dflt = &ipv4_devconf_dflt; 1706 1707 if (!net_eq(net, &init_net)) { 1708 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); 1709 if (all == NULL) 1710 goto err_alloc_all; 1711 1712 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); 1713 if (dflt == NULL) 1714 goto err_alloc_dflt; 1715 1716 #ifdef CONFIG_SYSCTL 1717 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); 1718 if (tbl == NULL) 1719 goto err_alloc_ctl; 1720 1721 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; 1722 tbl[0].extra1 = all; 1723 tbl[0].extra2 = net; 1724 #endif 1725 } 1726 1727 #ifdef CONFIG_SYSCTL 1728 err = __devinet_sysctl_register(net, "all", all); 1729 if (err < 0) 1730 goto err_reg_all; 1731 1732 err = __devinet_sysctl_register(net, "default", dflt); 1733 if (err < 0) 1734 goto err_reg_dflt; 1735 1736 err = -ENOMEM; 1737 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl); 1738 if (forw_hdr == NULL) 1739 goto err_reg_ctl; 1740 net->ipv4.forw_hdr = forw_hdr; 1741 #endif 1742 1743 net->ipv4.devconf_all = all; 1744 net->ipv4.devconf_dflt = dflt; 1745 return 0; 1746 1747 #ifdef CONFIG_SYSCTL 1748 err_reg_ctl: 1749 __devinet_sysctl_unregister(dflt); 1750 err_reg_dflt: 1751 __devinet_sysctl_unregister(all); 1752 err_reg_all: 1753 if (tbl != ctl_forward_entry) 1754 kfree(tbl); 1755 err_alloc_ctl: 1756 #endif 1757 if (dflt != &ipv4_devconf_dflt) 1758 kfree(dflt); 1759 err_alloc_dflt: 1760 if (all != &ipv4_devconf) 1761 kfree(all); 1762 err_alloc_all: 1763 return err; 1764 } 1765 1766 static __net_exit void devinet_exit_net(struct net *net) 1767 { 1768 #ifdef CONFIG_SYSCTL 1769 struct ctl_table *tbl; 1770 1771 tbl = net->ipv4.forw_hdr->ctl_table_arg; 1772 unregister_net_sysctl_table(net->ipv4.forw_hdr); 1773 __devinet_sysctl_unregister(net->ipv4.devconf_dflt); 1774 __devinet_sysctl_unregister(net->ipv4.devconf_all); 1775 kfree(tbl); 1776 #endif 1777 kfree(net->ipv4.devconf_dflt); 1778 kfree(net->ipv4.devconf_all); 1779 } 1780 1781 static __net_initdata struct pernet_operations devinet_ops = { 1782 .init = devinet_init_net, 1783 .exit = devinet_exit_net, 1784 }; 1785 1786 static struct rtnl_af_ops inet_af_ops = { 1787 .family = AF_INET, 1788 .fill_link_af = inet_fill_link_af, 1789 .get_link_af_size = inet_get_link_af_size, 1790 .validate_link_af = inet_validate_link_af, 1791 .set_link_af = inet_set_link_af, 1792 }; 1793 1794 void __init devinet_init(void) 1795 { 1796 int i; 1797 1798 for (i = 0; i < IN4_ADDR_HSIZE; i++) 1799 INIT_HLIST_HEAD(&inet_addr_lst[i]); 1800 1801 register_pernet_subsys(&devinet_ops); 1802 1803 register_gifconf(PF_INET, inet_gifconf); 1804 register_netdevice_notifier(&ip_netdev_notifier); 1805 1806 rtnl_af_register(&inet_af_ops); 1807 1808 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); 1809 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); 1810 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1811 } 1812 1813