1 /* 2 * IPv6 Address Label subsystem 3 * for the IPv6 "Default" Source Address Selection 4 * 5 * Copyright (C)2007 USAGI/WIDE Project 6 */ 7 /* 8 * Author: 9 * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <linux/rcupdate.h> 15 #include <linux/in6.h> 16 #include <net/addrconf.h> 17 #include <linux/if_addrlabel.h> 18 #include <linux/netlink.h> 19 #include <linux/rtnetlink.h> 20 21 #if 0 22 #define ADDRLABEL(x...) printk(x) 23 #else 24 #define ADDRLABEL(x...) do { ; } while(0) 25 #endif 26 27 /* 28 * Policy Table 29 */ 30 struct ip6addrlbl_entry 31 { 32 struct in6_addr prefix; 33 int prefixlen; 34 int ifindex; 35 int addrtype; 36 u32 label; 37 struct hlist_node list; 38 atomic_t refcnt; 39 struct rcu_head rcu; 40 }; 41 42 static struct ip6addrlbl_table 43 { 44 struct hlist_head head; 45 spinlock_t lock; 46 u32 seq; 47 } ip6addrlbl_table; 48 49 /* 50 * Default policy table (RFC3484 + extensions) 51 * 52 * prefix addr_type label 53 * ------------------------------------------------------------------------- 54 * ::1/128 LOOPBACK 0 55 * ::/0 N/A 1 56 * 2002::/16 N/A 2 57 * ::/96 COMPATv4 3 58 * ::ffff:0:0/96 V4MAPPED 4 59 * fc00::/7 N/A 5 ULA (RFC 4193) 60 * 2001::/32 N/A 6 Teredo (RFC 4380) 61 * 62 * Note: 0xffffffff is used if we do not have any policies. 63 */ 64 65 #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL 66 67 static const __initdata struct ip6addrlbl_init_table 68 { 69 const struct in6_addr *prefix; 70 int prefixlen; 71 u32 label; 72 } ip6addrlbl_init_table[] = { 73 { /* ::/0 */ 74 .prefix = &in6addr_any, 75 .label = 1, 76 },{ /* fc00::/7 */ 77 .prefix = &(struct in6_addr){{{ 0xfc }}}, 78 .prefixlen = 7, 79 .label = 5, 80 },{ /* 2002::/16 */ 81 .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, 82 .prefixlen = 16, 83 .label = 2, 84 },{ /* 2001::/32 */ 85 .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, 86 .prefixlen = 32, 87 .label = 6, 88 },{ /* ::ffff:0:0 */ 89 .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, 90 .prefixlen = 96, 91 .label = 4, 92 },{ /* ::/96 */ 93 .prefix = &in6addr_any, 94 .prefixlen = 96, 95 .label = 3, 96 },{ /* ::1/128 */ 97 .prefix = &in6addr_loopback, 98 .prefixlen = 128, 99 .label = 0, 100 } 101 }; 102 103 /* Object management */ 104 static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) 105 { 106 kfree(p); 107 } 108 109 static void ip6addrlbl_free_rcu(struct rcu_head *h) 110 { 111 ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); 112 } 113 114 static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p) 115 { 116 return atomic_inc_not_zero(&p->refcnt); 117 } 118 119 static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) 120 { 121 if (atomic_dec_and_test(&p->refcnt)) 122 call_rcu(&p->rcu, ip6addrlbl_free_rcu); 123 } 124 125 /* Find label */ 126 static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, 127 const struct in6_addr *addr, 128 int addrtype, int ifindex) 129 { 130 if (p->ifindex && p->ifindex != ifindex) 131 return 0; 132 if (p->addrtype && p->addrtype != addrtype) 133 return 0; 134 if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) 135 return 0; 136 return 1; 137 } 138 139 static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr, 140 int type, int ifindex) 141 { 142 struct hlist_node *pos; 143 struct ip6addrlbl_entry *p; 144 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 145 if (__ip6addrlbl_match(p, addr, type, ifindex)) 146 return p; 147 } 148 return NULL; 149 } 150 151 u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) 152 { 153 u32 label; 154 struct ip6addrlbl_entry *p; 155 156 type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; 157 158 rcu_read_lock(); 159 p = __ipv6_addr_label(addr, type, ifindex); 160 label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; 161 rcu_read_unlock(); 162 163 ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", 164 __FUNCTION__, 165 NIP6(*addr), type, ifindex, 166 label); 167 168 return label; 169 } 170 171 /* allocate one entry */ 172 static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, 173 int prefixlen, int ifindex, 174 u32 label) 175 { 176 struct ip6addrlbl_entry *newp; 177 int addrtype; 178 179 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", 180 __FUNCTION__, 181 NIP6(*prefix), prefixlen, 182 ifindex, 183 (unsigned int)label); 184 185 addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); 186 187 switch (addrtype) { 188 case IPV6_ADDR_MAPPED: 189 if (prefixlen > 96) 190 return ERR_PTR(-EINVAL); 191 if (prefixlen < 96) 192 addrtype = 0; 193 break; 194 case IPV6_ADDR_COMPATv4: 195 if (prefixlen != 96) 196 addrtype = 0; 197 break; 198 case IPV6_ADDR_LOOPBACK: 199 if (prefixlen != 128) 200 addrtype = 0; 201 break; 202 } 203 204 newp = kmalloc(sizeof(*newp), GFP_KERNEL); 205 if (!newp) 206 return ERR_PTR(-ENOMEM); 207 208 ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); 209 newp->prefixlen = prefixlen; 210 newp->ifindex = ifindex; 211 newp->addrtype = addrtype; 212 newp->label = label; 213 INIT_HLIST_NODE(&newp->list); 214 atomic_set(&newp->refcnt, 1); 215 return newp; 216 } 217 218 /* add a label */ 219 static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) 220 { 221 int ret = 0; 222 223 ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", 224 __FUNCTION__, 225 newp, replace); 226 227 if (hlist_empty(&ip6addrlbl_table.head)) { 228 hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); 229 } else { 230 struct hlist_node *pos, *n; 231 struct ip6addrlbl_entry *p = NULL; 232 hlist_for_each_entry_safe(p, pos, n, 233 &ip6addrlbl_table.head, list) { 234 if (p->prefixlen == newp->prefixlen && 235 p->ifindex == newp->ifindex && 236 ipv6_addr_equal(&p->prefix, &newp->prefix)) { 237 if (!replace) { 238 ret = -EEXIST; 239 goto out; 240 } 241 hlist_replace_rcu(&p->list, &newp->list); 242 ip6addrlbl_put(p); 243 goto out; 244 } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || 245 (p->prefixlen < newp->prefixlen)) { 246 hlist_add_before_rcu(&newp->list, &p->list); 247 goto out; 248 } 249 } 250 hlist_add_after_rcu(&p->list, &newp->list); 251 } 252 out: 253 if (!ret) 254 ip6addrlbl_table.seq++; 255 return ret; 256 } 257 258 /* add a label */ 259 static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, 260 int ifindex, u32 label, int replace) 261 { 262 struct ip6addrlbl_entry *newp; 263 int ret = 0; 264 265 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", 266 __FUNCTION__, 267 NIP6(*prefix), prefixlen, 268 ifindex, 269 (unsigned int)label, 270 replace); 271 272 newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); 273 if (IS_ERR(newp)) 274 return PTR_ERR(newp); 275 spin_lock(&ip6addrlbl_table.lock); 276 ret = __ip6addrlbl_add(newp, replace); 277 spin_unlock(&ip6addrlbl_table.lock); 278 if (ret) 279 ip6addrlbl_free(newp); 280 return ret; 281 } 282 283 /* remove a label */ 284 static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, 285 int ifindex) 286 { 287 struct ip6addrlbl_entry *p = NULL; 288 struct hlist_node *pos, *n; 289 int ret = -ESRCH; 290 291 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", 292 __FUNCTION__, 293 NIP6(*prefix), prefixlen, 294 ifindex); 295 296 hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { 297 if (p->prefixlen == prefixlen && 298 p->ifindex == ifindex && 299 ipv6_addr_equal(&p->prefix, prefix)) { 300 hlist_del_rcu(&p->list); 301 ip6addrlbl_put(p); 302 ret = 0; 303 break; 304 } 305 } 306 return ret; 307 } 308 309 static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, 310 int ifindex) 311 { 312 struct in6_addr prefix_buf; 313 int ret; 314 315 ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", 316 __FUNCTION__, 317 NIP6(*prefix), prefixlen, 318 ifindex); 319 320 ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); 321 spin_lock(&ip6addrlbl_table.lock); 322 ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex); 323 spin_unlock(&ip6addrlbl_table.lock); 324 return ret; 325 } 326 327 /* add default label */ 328 static __init int ip6addrlbl_init(void) 329 { 330 int err = 0; 331 int i; 332 333 ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__); 334 335 for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { 336 int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix, 337 ip6addrlbl_init_table[i].prefixlen, 338 0, 339 ip6addrlbl_init_table[i].label, 0); 340 /* XXX: should we free all rules when we catch an error? */ 341 if (ret && (!err || err != -ENOMEM)) 342 err = ret; 343 } 344 return err; 345 } 346 347 int __init ipv6_addr_label_init(void) 348 { 349 spin_lock_init(&ip6addrlbl_table.lock); 350 351 return ip6addrlbl_init(); 352 } 353 354 static const struct nla_policy ifal_policy[IFAL_MAX+1] = { 355 [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, 356 [IFAL_LABEL] = { .len = sizeof(u32), }, 357 }; 358 359 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 360 void *arg) 361 { 362 struct net *net = skb->sk->sk_net; 363 struct ifaddrlblmsg *ifal; 364 struct nlattr *tb[IFAL_MAX+1]; 365 struct in6_addr *pfx; 366 u32 label; 367 int err = 0; 368 369 if (net != &init_net) 370 return 0; 371 372 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 373 if (err < 0) 374 return err; 375 376 ifal = nlmsg_data(nlh); 377 378 if (ifal->ifal_family != AF_INET6 || 379 ifal->ifal_prefixlen > 128) 380 return -EINVAL; 381 382 if (ifal->ifal_index && 383 !__dev_get_by_index(&init_net, ifal->ifal_index)) 384 return -EINVAL; 385 386 if (!tb[IFAL_ADDRESS]) 387 return -EINVAL; 388 389 pfx = nla_data(tb[IFAL_ADDRESS]); 390 if (!pfx) 391 return -EINVAL; 392 393 if (!tb[IFAL_LABEL]) 394 return -EINVAL; 395 label = nla_get_u32(tb[IFAL_LABEL]); 396 if (label == IPV6_ADDR_LABEL_DEFAULT) 397 return -EINVAL; 398 399 switch(nlh->nlmsg_type) { 400 case RTM_NEWADDRLABEL: 401 err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen, 402 ifal->ifal_index, label, 403 nlh->nlmsg_flags & NLM_F_REPLACE); 404 break; 405 case RTM_DELADDRLABEL: 406 err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen, 407 ifal->ifal_index); 408 break; 409 default: 410 err = -EOPNOTSUPP; 411 } 412 return err; 413 } 414 415 static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh, 416 int prefixlen, int ifindex, u32 lseq) 417 { 418 struct ifaddrlblmsg *ifal = nlmsg_data(nlh); 419 ifal->ifal_family = AF_INET6; 420 ifal->ifal_prefixlen = prefixlen; 421 ifal->ifal_flags = 0; 422 ifal->ifal_index = ifindex; 423 ifal->ifal_seq = lseq; 424 }; 425 426 static int ip6addrlbl_fill(struct sk_buff *skb, 427 struct ip6addrlbl_entry *p, 428 u32 lseq, 429 u32 pid, u32 seq, int event, 430 unsigned int flags) 431 { 432 struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, 433 sizeof(struct ifaddrlblmsg), flags); 434 if (!nlh) 435 return -EMSGSIZE; 436 437 ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); 438 439 if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || 440 nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { 441 nlmsg_cancel(skb, nlh); 442 return -EMSGSIZE; 443 } 444 445 return nlmsg_end(skb, nlh); 446 } 447 448 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) 449 { 450 struct net *net = skb->sk->sk_net; 451 struct ip6addrlbl_entry *p; 452 struct hlist_node *pos; 453 int idx = 0, s_idx = cb->args[0]; 454 int err; 455 456 if (net != &init_net) 457 return 0; 458 459 rcu_read_lock(); 460 hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { 461 if (idx >= s_idx) { 462 if ((err = ip6addrlbl_fill(skb, p, 463 ip6addrlbl_table.seq, 464 NETLINK_CB(cb->skb).pid, 465 cb->nlh->nlmsg_seq, 466 RTM_NEWADDRLABEL, 467 NLM_F_MULTI)) <= 0) 468 break; 469 } 470 idx++; 471 } 472 rcu_read_unlock(); 473 cb->args[0] = idx; 474 return skb->len; 475 } 476 477 static inline int ip6addrlbl_msgsize(void) 478 { 479 return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 480 + nla_total_size(16) /* IFAL_ADDRESS */ 481 + nla_total_size(4) /* IFAL_LABEL */ 482 ); 483 } 484 485 static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 486 void *arg) 487 { 488 struct net *net = in_skb->sk->sk_net; 489 struct ifaddrlblmsg *ifal; 490 struct nlattr *tb[IFAL_MAX+1]; 491 struct in6_addr *addr; 492 u32 lseq; 493 int err = 0; 494 struct ip6addrlbl_entry *p; 495 struct sk_buff *skb; 496 497 if (net != &init_net) 498 return 0; 499 500 err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); 501 if (err < 0) 502 return err; 503 504 ifal = nlmsg_data(nlh); 505 506 if (ifal->ifal_family != AF_INET6 || 507 ifal->ifal_prefixlen != 128) 508 return -EINVAL; 509 510 if (ifal->ifal_index && 511 !__dev_get_by_index(&init_net, ifal->ifal_index)) 512 return -EINVAL; 513 514 if (!tb[IFAL_ADDRESS]) 515 return -EINVAL; 516 517 addr = nla_data(tb[IFAL_ADDRESS]); 518 if (!addr) 519 return -EINVAL; 520 521 rcu_read_lock(); 522 p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index); 523 if (p && ip6addrlbl_hold(p)) 524 p = NULL; 525 lseq = ip6addrlbl_table.seq; 526 rcu_read_unlock(); 527 528 if (!p) { 529 err = -ESRCH; 530 goto out; 531 } 532 533 if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { 534 ip6addrlbl_put(p); 535 return -ENOBUFS; 536 } 537 538 err = ip6addrlbl_fill(skb, p, lseq, 539 NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 540 RTM_NEWADDRLABEL, 0); 541 542 ip6addrlbl_put(p); 543 544 if (err < 0) { 545 WARN_ON(err == -EMSGSIZE); 546 kfree_skb(skb); 547 goto out; 548 } 549 550 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); 551 out: 552 return err; 553 } 554 555 void __init ipv6_addr_label_rtnl_register(void) 556 { 557 __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL); 558 __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL); 559 __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump); 560 } 561 562