1 /* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7 /* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/in6.h> 16 #include <linux/slab.h> 17 #include <net/addrconf.h> 18 #include <linux/if_addrlabel.h> 19 #include <linux/netlink.h> 20 #include <linux/rtnetlink.h> 21 #include <linux/refcount.h> 22 23 #if 0 24 #define ADDRLABEL(x...) printk(x) 25 #else 26 #define ADDRLABEL(x...) do { ; } while (0) 27 #endif 28 29 /* 30 * Policy Table 31 */ 32 struct ip6addrlbl_entry { 33 possible_net_t lbl_net; 34 struct in6_addr prefix; 35 int prefixlen; 36 int ifindex; 37 int addrtype; 38 u32 label; 39 struct hlist_node list; 40 refcount_t refcnt; 41 struct rcu_head rcu; 42 }; 43 44 static struct ip6addrlbl_table 45 { 46 struct hlist_head head; 47 spinlock_t lock; 48 u32 seq; 49 } ip6addrlbl_table; 50 51 static inline 52 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 53 { 54 return read_pnet(&lbl->lbl_net); 55 } 56 57 /* 58 * Default policy table (RFC6724 + extensions) 59 * 60 * prefix addr_type label 61 * ------------------------------------------------------------------------- 62 * ::1/128 LOOPBACK 0 63 * ::/0 N/A 1 64 * 2002::/16 N/A 2 65 * ::/96 COMPATv4 3 66 * ::ffff:0:0/96 V4MAPPED 4 67 * fc00::/7 N/A 5 ULA (RFC 4193) 68 * 2001::/32 N/A 6 Teredo (RFC 4380) 69 * 2001:10::/28 N/A 7 ORCHID (RFC 4843) 70 * fec0::/10 N/A 11 Site-local 71 * (deprecated by RFC3879) 72 * 3ffe::/16 N/A 12 6bone 73 * 74 * Note: 0xffffffff is used if we do not have any policies. 75 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. 76 */ 77 78 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 79 80 static const __net_initconst struct ip6addrlbl_init_table 81 { 82 const struct in6_addr *prefix; 83 int prefixlen; 84 u32 label; 85 } ip6addrlbl_init_table[] = { 86 { /* ::/0 */ 87 .prefix = &in6addr_any, 88 .label = 1, 89 }, { /* fc00::/7 */ 90 .prefix = &(struct in6_addr){ { { 0xfc } } } , 91 .prefixlen = 7, 92 .label = 5, 93 }, { /* fec0::/10 */ 94 .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, 95 .prefixlen = 10, 96 .label = 11, 97 }, { /* 2002::/16 */ 98 .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, 99 .prefixlen = 16, 100 .label = 2, 101 }, { /* 3ffe::/16 */ 102 .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, 103 .prefixlen = 16, 104 .label = 12, 105 }, { /* 2001::/32 */ 106 .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, 107 .prefixlen = 32, 108 .label = 6, 109 }, { /* 2001:10::/28 */ 110 .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, 111 .prefixlen = 28, 112 .label = 7, 113 }, { /* ::ffff:0:0 */ 114 .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, 115 .prefixlen = 96, 116 .label = 4, 117 }, { /* ::/96 */ 118 .prefix = &in6addr_any, 119 .prefixlen = 96, 120 .label = 3, 121 }, { /* ::1/128 */ 122 .prefix = &in6addr_loopback, 123 .prefixlen = 128, 124 .label = 0, 125 } 126 }; 127 128 /* Object management */ 129 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 130 { 131 kfree(p); 132 } 133 134 static void ip6addrlbl_free_rcu(struct rcu_head *h) 135 { 136 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 137 } 138 139 static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) 140 { 141 return refcount_inc_not_zero(&p->refcnt); 142 } 143 144 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 145 { 146 if (refcount_dec_and_test(&p->refcnt)) 147 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 148 } 149 150 /* Find label */ 151 static bool __ip6addrlbl_match(struct net *net, 152 const struct ip6addrlbl_entry *p, 153 const struct in6_addr *addr, 154 int addrtype, int ifindex) 155 { 156 if (!net_eq(ip6addrlbl_net(p), net)) 157 return false; 158 if (p->ifindex && p->ifindex != ifindex) 159 return false; 160 if (p->addrtype && p->addrtype != addrtype) 161 return false; 162 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 163 return false; 164 return true; 165 } 166 167 static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, 168 const struct in6_addr *addr, 169 int type, int ifindex) 170 { 171 struct ip6addrlbl_entry *p; 172 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 173 if (__ip6addrlbl_match(net, p, addr, type, ifindex)) 174 return p; 175 } 176 return NULL; 177 } 178 179 u32 ipv6_addr_label(struct net *net, 180 const struct in6_addr *addr, int type, int ifindex) 181 { 182 u32 label; 183 struct ip6addrlbl_entry *p; 184 185 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 186 187 rcu_read_lock(); 188 p = __ipv6_addr_label(net, addr, type, ifindex); 189 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 190 rcu_read_unlock(); 191 192 ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", 193 __func__, addr, type, ifindex, label); 194 195 return label; 196 } 197 198 /* allocate one entry */ 199 static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, 200 const struct in6_addr *prefix, 201 int prefixlen, int ifindex, 202 u32 label) 203 { 204 struct ip6addrlbl_entry *newp; 205 int addrtype; 206 207 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", 208 __func__, prefix, prefixlen, ifindex, (unsigned int)label); 209 210 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 211 212 switch (addrtype) { 213 case IPV6_ADDR_MAPPED: 214 if (prefixlen > 96) 215 return ERR_PTR(-EINVAL); 216 if (prefixlen < 96) 217 addrtype = 0; 218 break; 219 case IPV6_ADDR_COMPATv4: 220 if (prefixlen != 96) 221 addrtype = 0; 222 break; 223 case IPV6_ADDR_LOOPBACK: 224 if (prefixlen != 128) 225 addrtype = 0; 226 break; 227 } 228 229 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 230 if (!newp) 231 return ERR_PTR(-ENOMEM); 232 233 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 234 newp->prefixlen = prefixlen; 235 newp->ifindex = ifindex; 236 newp->addrtype = addrtype; 237 newp->label = label; 238 INIT_HLIST_NODE(&newp->list); 239 write_pnet(&newp->lbl_net, net); 240 refcount_set(&newp->refcnt, 1); 241 return newp; 242 } 243 244 /* add a label */ 245 static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 246 { 247 struct hlist_node *n; 248 struct ip6addrlbl_entry *last = NULL, *p = NULL; 249 int ret = 0; 250 251 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, 252 replace); 253 254 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 255 if (p->prefixlen == newp->prefixlen && 256 net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && 257 p->ifindex == newp->ifindex && 258 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 259 if (!replace) { 260 ret = -EEXIST; 261 goto out; 262 } 263 hlist_replace_rcu(&p->list, &newp->list); 264 ip6addrlbl_put(p); 265 goto out; 266 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 267 (p->prefixlen < newp->prefixlen)) { 268 hlist_add_before_rcu(&newp->list, &p->list); 269 goto out; 270 } 271 last = p; 272 } 273 if (last) 274 hlist_add_behind_rcu(&newp->list, &last->list); 275 else 276 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 277 out: 278 if (!ret) 279 ip6addrlbl_table.seq++; 280 return ret; 281 } 282 283 /* add a label */ 284 static int ip6addrlbl_add(struct net *net, 285 const struct in6_addr *prefix, int prefixlen, 286 int ifindex, u32 label, int replace) 287 { 288 struct ip6addrlbl_entry *newp; 289 int ret = 0; 290 291 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 292 __func__, prefix, prefixlen, ifindex, (unsigned int)label, 293 replace); 294 295 newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); 296 if (IS_ERR(newp)) 297 return PTR_ERR(newp); 298 spin_lock(&ip6addrlbl_table.lock); 299 ret = __ip6addrlbl_add(newp, replace); 300 spin_unlock(&ip6addrlbl_table.lock); 301 if (ret) 302 ip6addrlbl_free(newp); 303 return ret; 304 } 305 306 /* remove a label */ 307 static int __ip6addrlbl_del(struct net *net, 308 const struct in6_addr *prefix, int prefixlen, 309 int ifindex) 310 { 311 struct ip6addrlbl_entry *p = NULL; 312 struct hlist_node *n; 313 int ret = -ESRCH; 314 315 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 316 __func__, prefix, prefixlen, ifindex); 317 318 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 319 if (p->prefixlen == prefixlen && 320 net_eq(ip6addrlbl_net(p), net) && 321 p->ifindex == ifindex && 322 ipv6_addr_equal(&p->prefix, prefix)) { 323 hlist_del_rcu(&p->list); 324 ip6addrlbl_put(p); 325 ret = 0; 326 break; 327 } 328 } 329 return ret; 330 } 331 332 static int ip6addrlbl_del(struct net *net, 333 const struct in6_addr *prefix, int prefixlen, 334 int ifindex) 335 { 336 struct in6_addr prefix_buf; 337 int ret; 338 339 ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", 340 __func__, prefix, prefixlen, ifindex); 341 342 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 343 spin_lock(&ip6addrlbl_table.lock); 344 ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); 345 spin_unlock(&ip6addrlbl_table.lock); 346 return ret; 347 } 348 349 /* add default label */ 350 static int __net_init ip6addrlbl_net_init(struct net *net) 351 { 352 int err = 0; 353 int i; 354 355 ADDRLABEL(KERN_DEBUG "%s\n", __func__); 356 357 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 358 int ret = ip6addrlbl_add(net, 359 ip6addrlbl_init_table[i].prefix, 360 ip6addrlbl_init_table[i].prefixlen, 361 0, 362 ip6addrlbl_init_table[i].label, 0); 363 /* XXX: should we free all rules when we catch an error? */ 364 if (ret && (!err || err != -ENOMEM)) 365 err = ret; 366 } 367 return err; 368 } 369 370 static void __net_exit ip6addrlbl_net_exit(struct net *net) 371 { 372 struct ip6addrlbl_entry *p = NULL; 373 struct hlist_node *n; 374 375 /* Remove all labels belonging to the exiting net */ 376 spin_lock(&ip6addrlbl_table.lock); 377 hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { 378 if (net_eq(ip6addrlbl_net(p), net)) { 379 hlist_del_rcu(&p->list); 380 ip6addrlbl_put(p); 381 } 382 } 383 spin_unlock(&ip6addrlbl_table.lock); 384 } 385 386 static struct pernet_operations ipv6_addr_label_ops = { 387 .init = ip6addrlbl_net_init, 388 .exit = ip6addrlbl_net_exit, 389 }; 390 391 int __init ipv6_addr_label_init(void) 392 { 393 spin_lock_init(&ip6addrlbl_table.lock); 394 395 return register_pernet_subsys(&ipv6_addr_label_ops); 396 } 397 398 void ipv6_addr_label_cleanup(void) 399 { 400 unregister_pernet_subsys(&ipv6_addr_label_ops); 401 } 402 403 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 404 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 405 [IFAL_LABEL] = { .len = sizeof(u32), }, 406 }; 407 408 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 409 struct netlink_ext_ack *extack) 410 { 411 struct net *net = sock_net(skb->sk); 412 struct ifaddrlblmsg *ifal; 413 struct nlattr *tb[IFAL_MAX+1]; 414 struct in6_addr *pfx; 415 u32 label; 416 int err = 0; 417 418 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, 419 extack); 420 if (err < 0) 421 return err; 422 423 ifal = nlmsg_data(nlh); 424 425 if (ifal->ifal_family != AF_INET6 || 426 ifal->ifal_prefixlen > 128) 427 return -EINVAL; 428 429 if (!tb[IFAL_ADDRESS]) 430 return -EINVAL; 431 pfx = nla_data(tb[IFAL_ADDRESS]); 432 433 if (!tb[IFAL_LABEL]) 434 return -EINVAL; 435 label = nla_get_u32(tb[IFAL_LABEL]); 436 if (label == IPV6_ADDR_LABEL_DEFAULT) 437 return -EINVAL; 438 439 switch (nlh->nlmsg_type) { 440 case RTM_NEWADDRLABEL: 441 if (ifal->ifal_index && 442 !__dev_get_by_index(net, ifal->ifal_index)) 443 return -EINVAL; 444 445 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 446 ifal->ifal_index, label, 447 nlh->nlmsg_flags & NLM_F_REPLACE); 448 break; 449 case RTM_DELADDRLABEL: 450 err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, 451 ifal->ifal_index); 452 break; 453 default: 454 err = -EOPNOTSUPP; 455 } 456 return err; 457 } 458 459 static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 460 int prefixlen, int ifindex, u32 lseq) 461 { 462 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 463 ifal->ifal_family = AF_INET6; 464 ifal->ifal_prefixlen = prefixlen; 465 ifal->ifal_flags = 0; 466 ifal->ifal_index = ifindex; 467 ifal->ifal_seq = lseq; 468 }; 469 470 static int ip6addrlbl_fill(struct sk_buff *skb, 471 struct ip6addrlbl_entry *p, 472 u32 lseq, 473 u32 portid, u32 seq, int event, 474 unsigned int flags) 475 { 476 struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, 477 sizeof(struct ifaddrlblmsg), flags); 478 if (!nlh) 479 return -EMSGSIZE; 480 481 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 482 483 if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || 484 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 485 nlmsg_cancel(skb, nlh); 486 return -EMSGSIZE; 487 } 488 489 nlmsg_end(skb, nlh); 490 return 0; 491 } 492 493 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 494 { 495 struct net *net = sock_net(skb->sk); 496 struct ip6addrlbl_entry *p; 497 int idx = 0, s_idx = cb->args[0]; 498 int err; 499 500 rcu_read_lock(); 501 hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { 502 if (idx >= s_idx && 503 net_eq(ip6addrlbl_net(p), net)) { 504 err = ip6addrlbl_fill(skb, p, 505 ip6addrlbl_table.seq, 506 NETLINK_CB(cb->skb).portid, 507 cb->nlh->nlmsg_seq, 508 RTM_NEWADDRLABEL, 509 NLM_F_MULTI); 510 if (err < 0) 511 break; 512 } 513 idx++; 514 } 515 rcu_read_unlock(); 516 cb->args[0] = idx; 517 return skb->len; 518 } 519 520 static inline int ip6addrlbl_msgsize(void) 521 { 522 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 523 + nla_total_size(16) /* IFAL_ADDRESS */ 524 + nla_total_size(4); /* IFAL_LABEL */ 525 } 526 527 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 528 struct netlink_ext_ack *extack) 529 { 530 struct net *net = sock_net(in_skb->sk); 531 struct ifaddrlblmsg *ifal; 532 struct nlattr *tb[IFAL_MAX+1]; 533 struct in6_addr *addr; 534 u32 lseq; 535 int err = 0; 536 struct ip6addrlbl_entry *p; 537 struct sk_buff *skb; 538 539 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, 540 extack); 541 if (err < 0) 542 return err; 543 544 ifal = nlmsg_data(nlh); 545 546 if (ifal->ifal_family != AF_INET6 || 547 ifal->ifal_prefixlen != 128) 548 return -EINVAL; 549 550 if (ifal->ifal_index && 551 !__dev_get_by_index(net, ifal->ifal_index)) 552 return -EINVAL; 553 554 if (!tb[IFAL_ADDRESS]) 555 return -EINVAL; 556 addr = nla_data(tb[IFAL_ADDRESS]); 557 558 rcu_read_lock(); 559 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 560 if (p && !ip6addrlbl_hold(p)) 561 p = NULL; 562 lseq = ip6addrlbl_table.seq; 563 rcu_read_unlock(); 564 565 if (!p) { 566 err = -ESRCH; 567 goto out; 568 } 569 570 skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); 571 if (!skb) { 572 ip6addrlbl_put(p); 573 return -ENOBUFS; 574 } 575 576 err = ip6addrlbl_fill(skb, p, lseq, 577 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 578 RTM_NEWADDRLABEL, 0); 579 580 ip6addrlbl_put(p); 581 582 if (err < 0) { 583 WARN_ON(err == -EMSGSIZE); 584 kfree_skb(skb); 585 goto out; 586 } 587 588 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 589 out: 590 return err; 591 } 592 593 void __init ipv6_addr_label_rtnl_register(void) 594 { 595 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, 596 NULL, NULL); 597 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, 598 NULL, NULL); 599 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, 600 ip6addrlbl_dump, NULL); 601 } 602 603