1 /* 2 * net/core/fib_rules.c Generic Routing Rules 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License as 6 * published by the Free Software Foundation, version 2. 7 * 8 * Authors: Thomas Graf <tgraf@suug.ch> 9 */ 10 11 #include <linux/types.h> 12 #include <linux/kernel.h> 13 #include <linux/list.h> 14 #include <net/net_namespace.h> 15 #include <net/sock.h> 16 #include <net/fib_rules.h> 17 18 static LIST_HEAD(rules_ops); 19 static DEFINE_SPINLOCK(rules_mod_lock); 20 21 int fib_default_rule_add(struct fib_rules_ops *ops, 22 u32 pref, u32 table, u32 flags) 23 { 24 struct fib_rule *r; 25 26 r = kzalloc(ops->rule_size, GFP_KERNEL); 27 if (r == NULL) 28 return -ENOMEM; 29 30 atomic_set(&r->refcnt, 1); 31 r->action = FR_ACT_TO_TBL; 32 r->pref = pref; 33 r->table = table; 34 r->flags = flags; 35 36 /* The lock is not required here, the list in unreacheable 37 * at the moment this function is called */ 38 list_add_tail(&r->list, &ops->rules_list); 39 return 0; 40 } 41 EXPORT_SYMBOL(fib_default_rule_add); 42 43 static void notify_rule_change(int event, struct fib_rule *rule, 44 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 45 u32 pid); 46 47 static struct fib_rules_ops *lookup_rules_ops(int family) 48 { 49 struct fib_rules_ops *ops; 50 51 rcu_read_lock(); 52 list_for_each_entry_rcu(ops, &rules_ops, list) { 53 if (ops->family == family) { 54 if (!try_module_get(ops->owner)) 55 ops = NULL; 56 rcu_read_unlock(); 57 return ops; 58 } 59 } 60 rcu_read_unlock(); 61 62 return NULL; 63 } 64 65 static void rules_ops_put(struct fib_rules_ops *ops) 66 { 67 if (ops) 68 module_put(ops->owner); 69 } 70 71 static void flush_route_cache(struct fib_rules_ops *ops) 72 { 73 if (ops->flush_cache) 74 ops->flush_cache(); 75 } 76 77 int fib_rules_register(struct fib_rules_ops *ops) 78 { 79 int err = -EEXIST; 80 struct fib_rules_ops *o; 81 82 if (ops->rule_size < sizeof(struct fib_rule)) 83 return -EINVAL; 84 85 if (ops->match == NULL || ops->configure == NULL || 86 ops->compare == NULL || ops->fill == NULL || 87 ops->action == NULL) 88 return -EINVAL; 89 90 spin_lock(&rules_mod_lock); 91 list_for_each_entry(o, &rules_ops, list) 92 if (ops->family == o->family) 93 goto errout; 94 95 list_add_tail_rcu(&ops->list, &rules_ops); 96 err = 0; 97 errout: 98 spin_unlock(&rules_mod_lock); 99 100 return err; 101 } 102 103 EXPORT_SYMBOL_GPL(fib_rules_register); 104 105 static void cleanup_ops(struct fib_rules_ops *ops) 106 { 107 struct fib_rule *rule, *tmp; 108 109 list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { 110 list_del_rcu(&rule->list); 111 fib_rule_put(rule); 112 } 113 } 114 115 int fib_rules_unregister(struct fib_rules_ops *ops) 116 { 117 int err = 0; 118 struct fib_rules_ops *o; 119 120 spin_lock(&rules_mod_lock); 121 list_for_each_entry(o, &rules_ops, list) { 122 if (o == ops) { 123 list_del_rcu(&o->list); 124 cleanup_ops(ops); 125 goto out; 126 } 127 } 128 129 err = -ENOENT; 130 out: 131 spin_unlock(&rules_mod_lock); 132 133 synchronize_rcu(); 134 135 return err; 136 } 137 138 EXPORT_SYMBOL_GPL(fib_rules_unregister); 139 140 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, 141 struct flowi *fl, int flags) 142 { 143 int ret = 0; 144 145 if (rule->ifindex && (rule->ifindex != fl->iif)) 146 goto out; 147 148 if ((rule->mark ^ fl->mark) & rule->mark_mask) 149 goto out; 150 151 ret = ops->match(rule, fl, flags); 152 out: 153 return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; 154 } 155 156 int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, 157 int flags, struct fib_lookup_arg *arg) 158 { 159 struct fib_rule *rule; 160 int err; 161 162 rcu_read_lock(); 163 164 list_for_each_entry_rcu(rule, &ops->rules_list, list) { 165 jumped: 166 if (!fib_rule_match(rule, ops, fl, flags)) 167 continue; 168 169 if (rule->action == FR_ACT_GOTO) { 170 struct fib_rule *target; 171 172 target = rcu_dereference(rule->ctarget); 173 if (target == NULL) { 174 continue; 175 } else { 176 rule = target; 177 goto jumped; 178 } 179 } else if (rule->action == FR_ACT_NOP) 180 continue; 181 else 182 err = ops->action(rule, fl, flags, arg); 183 184 if (err != -EAGAIN) { 185 fib_rule_get(rule); 186 arg->rule = rule; 187 goto out; 188 } 189 } 190 191 err = -ESRCH; 192 out: 193 rcu_read_unlock(); 194 195 return err; 196 } 197 198 EXPORT_SYMBOL_GPL(fib_rules_lookup); 199 200 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, 201 struct fib_rules_ops *ops) 202 { 203 int err = -EINVAL; 204 205 if (frh->src_len) 206 if (tb[FRA_SRC] == NULL || 207 frh->src_len > (ops->addr_size * 8) || 208 nla_len(tb[FRA_SRC]) != ops->addr_size) 209 goto errout; 210 211 if (frh->dst_len) 212 if (tb[FRA_DST] == NULL || 213 frh->dst_len > (ops->addr_size * 8) || 214 nla_len(tb[FRA_DST]) != ops->addr_size) 215 goto errout; 216 217 err = 0; 218 errout: 219 return err; 220 } 221 222 static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 223 { 224 struct net *net = skb->sk->sk_net; 225 struct fib_rule_hdr *frh = nlmsg_data(nlh); 226 struct fib_rules_ops *ops = NULL; 227 struct fib_rule *rule, *r, *last = NULL; 228 struct nlattr *tb[FRA_MAX+1]; 229 int err = -EINVAL, unresolved = 0; 230 231 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 232 goto errout; 233 234 ops = lookup_rules_ops(frh->family); 235 if (ops == NULL) { 236 err = EAFNOSUPPORT; 237 goto errout; 238 } 239 240 err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); 241 if (err < 0) 242 goto errout; 243 244 err = validate_rulemsg(frh, tb, ops); 245 if (err < 0) 246 goto errout; 247 248 rule = kzalloc(ops->rule_size, GFP_KERNEL); 249 if (rule == NULL) { 250 err = -ENOMEM; 251 goto errout; 252 } 253 254 if (tb[FRA_PRIORITY]) 255 rule->pref = nla_get_u32(tb[FRA_PRIORITY]); 256 257 if (tb[FRA_IFNAME]) { 258 struct net_device *dev; 259 260 rule->ifindex = -1; 261 nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); 262 dev = __dev_get_by_name(net, rule->ifname); 263 if (dev) 264 rule->ifindex = dev->ifindex; 265 } 266 267 if (tb[FRA_FWMARK]) { 268 rule->mark = nla_get_u32(tb[FRA_FWMARK]); 269 if (rule->mark) 270 /* compatibility: if the mark value is non-zero all bits 271 * are compared unless a mask is explicitly specified. 272 */ 273 rule->mark_mask = 0xFFFFFFFF; 274 } 275 276 if (tb[FRA_FWMASK]) 277 rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); 278 279 rule->action = frh->action; 280 rule->flags = frh->flags; 281 rule->table = frh_get_table(frh, tb); 282 283 if (!rule->pref && ops->default_pref) 284 rule->pref = ops->default_pref(); 285 286 err = -EINVAL; 287 if (tb[FRA_GOTO]) { 288 if (rule->action != FR_ACT_GOTO) 289 goto errout_free; 290 291 rule->target = nla_get_u32(tb[FRA_GOTO]); 292 /* Backward jumps are prohibited to avoid endless loops */ 293 if (rule->target <= rule->pref) 294 goto errout_free; 295 296 list_for_each_entry(r, &ops->rules_list, list) { 297 if (r->pref == rule->target) { 298 rule->ctarget = r; 299 break; 300 } 301 } 302 303 if (rule->ctarget == NULL) 304 unresolved = 1; 305 } else if (rule->action == FR_ACT_GOTO) 306 goto errout_free; 307 308 err = ops->configure(rule, skb, nlh, frh, tb); 309 if (err < 0) 310 goto errout_free; 311 312 list_for_each_entry(r, &ops->rules_list, list) { 313 if (r->pref > rule->pref) 314 break; 315 last = r; 316 } 317 318 fib_rule_get(rule); 319 320 if (ops->unresolved_rules) { 321 /* 322 * There are unresolved goto rules in the list, check if 323 * any of them are pointing to this new rule. 324 */ 325 list_for_each_entry(r, &ops->rules_list, list) { 326 if (r->action == FR_ACT_GOTO && 327 r->target == rule->pref) { 328 BUG_ON(r->ctarget != NULL); 329 rcu_assign_pointer(r->ctarget, rule); 330 if (--ops->unresolved_rules == 0) 331 break; 332 } 333 } 334 } 335 336 if (rule->action == FR_ACT_GOTO) 337 ops->nr_goto_rules++; 338 339 if (unresolved) 340 ops->unresolved_rules++; 341 342 if (last) 343 list_add_rcu(&rule->list, &last->list); 344 else 345 list_add_rcu(&rule->list, &ops->rules_list); 346 347 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); 348 flush_route_cache(ops); 349 rules_ops_put(ops); 350 return 0; 351 352 errout_free: 353 kfree(rule); 354 errout: 355 rules_ops_put(ops); 356 return err; 357 } 358 359 static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 360 { 361 struct fib_rule_hdr *frh = nlmsg_data(nlh); 362 struct fib_rules_ops *ops = NULL; 363 struct fib_rule *rule, *tmp; 364 struct nlattr *tb[FRA_MAX+1]; 365 int err = -EINVAL; 366 367 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) 368 goto errout; 369 370 ops = lookup_rules_ops(frh->family); 371 if (ops == NULL) { 372 err = EAFNOSUPPORT; 373 goto errout; 374 } 375 376 err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); 377 if (err < 0) 378 goto errout; 379 380 err = validate_rulemsg(frh, tb, ops); 381 if (err < 0) 382 goto errout; 383 384 list_for_each_entry(rule, &ops->rules_list, list) { 385 if (frh->action && (frh->action != rule->action)) 386 continue; 387 388 if (frh->table && (frh_get_table(frh, tb) != rule->table)) 389 continue; 390 391 if (tb[FRA_PRIORITY] && 392 (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) 393 continue; 394 395 if (tb[FRA_IFNAME] && 396 nla_strcmp(tb[FRA_IFNAME], rule->ifname)) 397 continue; 398 399 if (tb[FRA_FWMARK] && 400 (rule->mark != nla_get_u32(tb[FRA_FWMARK]))) 401 continue; 402 403 if (tb[FRA_FWMASK] && 404 (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) 405 continue; 406 407 if (!ops->compare(rule, frh, tb)) 408 continue; 409 410 if (rule->flags & FIB_RULE_PERMANENT) { 411 err = -EPERM; 412 goto errout; 413 } 414 415 list_del_rcu(&rule->list); 416 417 if (rule->action == FR_ACT_GOTO) 418 ops->nr_goto_rules--; 419 420 /* 421 * Check if this rule is a target to any of them. If so, 422 * disable them. As this operation is eventually very 423 * expensive, it is only performed if goto rules have 424 * actually been added. 425 */ 426 if (ops->nr_goto_rules > 0) { 427 list_for_each_entry(tmp, &ops->rules_list, list) { 428 if (tmp->ctarget == rule) { 429 rcu_assign_pointer(tmp->ctarget, NULL); 430 ops->unresolved_rules++; 431 } 432 } 433 } 434 435 synchronize_rcu(); 436 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 437 NETLINK_CB(skb).pid); 438 fib_rule_put(rule); 439 flush_route_cache(ops); 440 rules_ops_put(ops); 441 return 0; 442 } 443 444 err = -ENOENT; 445 errout: 446 rules_ops_put(ops); 447 return err; 448 } 449 450 static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, 451 struct fib_rule *rule) 452 { 453 size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) 454 + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ 455 + nla_total_size(4) /* FRA_PRIORITY */ 456 + nla_total_size(4) /* FRA_TABLE */ 457 + nla_total_size(4) /* FRA_FWMARK */ 458 + nla_total_size(4); /* FRA_FWMASK */ 459 460 if (ops->nlmsg_payload) 461 payload += ops->nlmsg_payload(rule); 462 463 return payload; 464 } 465 466 static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, 467 u32 pid, u32 seq, int type, int flags, 468 struct fib_rules_ops *ops) 469 { 470 struct nlmsghdr *nlh; 471 struct fib_rule_hdr *frh; 472 473 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); 474 if (nlh == NULL) 475 return -EMSGSIZE; 476 477 frh = nlmsg_data(nlh); 478 frh->table = rule->table; 479 NLA_PUT_U32(skb, FRA_TABLE, rule->table); 480 frh->res1 = 0; 481 frh->res2 = 0; 482 frh->action = rule->action; 483 frh->flags = rule->flags; 484 485 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) 486 frh->flags |= FIB_RULE_UNRESOLVED; 487 488 if (rule->ifname[0]) { 489 NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); 490 491 if (rule->ifindex == -1) 492 frh->flags |= FIB_RULE_DEV_DETACHED; 493 } 494 495 if (rule->pref) 496 NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); 497 498 if (rule->mark) 499 NLA_PUT_U32(skb, FRA_FWMARK, rule->mark); 500 501 if (rule->mark_mask || rule->mark) 502 NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask); 503 504 if (rule->target) 505 NLA_PUT_U32(skb, FRA_GOTO, rule->target); 506 507 if (ops->fill(rule, skb, nlh, frh) < 0) 508 goto nla_put_failure; 509 510 return nlmsg_end(skb, nlh); 511 512 nla_put_failure: 513 nlmsg_cancel(skb, nlh); 514 return -EMSGSIZE; 515 } 516 517 static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, 518 struct fib_rules_ops *ops) 519 { 520 int idx = 0; 521 struct fib_rule *rule; 522 523 list_for_each_entry(rule, &ops->rules_list, list) { 524 if (idx < cb->args[1]) 525 goto skip; 526 527 if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, 528 cb->nlh->nlmsg_seq, RTM_NEWRULE, 529 NLM_F_MULTI, ops) < 0) 530 break; 531 skip: 532 idx++; 533 } 534 cb->args[1] = idx; 535 rules_ops_put(ops); 536 537 return skb->len; 538 } 539 540 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) 541 { 542 struct fib_rules_ops *ops; 543 int idx = 0, family; 544 545 family = rtnl_msg_family(cb->nlh); 546 if (family != AF_UNSPEC) { 547 /* Protocol specific dump request */ 548 ops = lookup_rules_ops(family); 549 if (ops == NULL) 550 return -EAFNOSUPPORT; 551 552 return dump_rules(skb, cb, ops); 553 } 554 555 rcu_read_lock(); 556 list_for_each_entry_rcu(ops, &rules_ops, list) { 557 if (idx < cb->args[0] || !try_module_get(ops->owner)) 558 goto skip; 559 560 if (dump_rules(skb, cb, ops) < 0) 561 break; 562 563 cb->args[1] = 0; 564 skip: 565 idx++; 566 } 567 rcu_read_unlock(); 568 cb->args[0] = idx; 569 570 return skb->len; 571 } 572 573 static void notify_rule_change(int event, struct fib_rule *rule, 574 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 575 u32 pid) 576 { 577 struct sk_buff *skb; 578 int err = -ENOBUFS; 579 580 skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); 581 if (skb == NULL) 582 goto errout; 583 584 err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); 585 if (err < 0) { 586 /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */ 587 WARN_ON(err == -EMSGSIZE); 588 kfree_skb(skb); 589 goto errout; 590 } 591 err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); 592 errout: 593 if (err < 0) 594 rtnl_set_sk_err(ops->nlgroup, err); 595 } 596 597 static void attach_rules(struct list_head *rules, struct net_device *dev) 598 { 599 struct fib_rule *rule; 600 601 list_for_each_entry(rule, rules, list) { 602 if (rule->ifindex == -1 && 603 strcmp(dev->name, rule->ifname) == 0) 604 rule->ifindex = dev->ifindex; 605 } 606 } 607 608 static void detach_rules(struct list_head *rules, struct net_device *dev) 609 { 610 struct fib_rule *rule; 611 612 list_for_each_entry(rule, rules, list) 613 if (rule->ifindex == dev->ifindex) 614 rule->ifindex = -1; 615 } 616 617 618 static int fib_rules_event(struct notifier_block *this, unsigned long event, 619 void *ptr) 620 { 621 struct net_device *dev = ptr; 622 struct fib_rules_ops *ops; 623 624 if (dev->nd_net != &init_net) 625 return NOTIFY_DONE; 626 627 ASSERT_RTNL(); 628 rcu_read_lock(); 629 630 switch (event) { 631 case NETDEV_REGISTER: 632 list_for_each_entry(ops, &rules_ops, list) 633 attach_rules(&ops->rules_list, dev); 634 break; 635 636 case NETDEV_UNREGISTER: 637 list_for_each_entry(ops, &rules_ops, list) 638 detach_rules(&ops->rules_list, dev); 639 break; 640 } 641 642 rcu_read_unlock(); 643 644 return NOTIFY_DONE; 645 } 646 647 static struct notifier_block fib_rules_notifier = { 648 .notifier_call = fib_rules_event, 649 }; 650 651 static int __init fib_rules_init(void) 652 { 653 rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); 654 rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); 655 rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); 656 657 return register_netdevice_notifier(&fib_rules_notifier); 658 } 659 660 subsys_initcall(fib_rules_init); 661