1 /* 2 * net/sched/cls_api.c Packet classifier API. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Changes: 12 * 13 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 14 * 15 */ 16 17 #include <linux/module.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/string.h> 21 #include <linux/errno.h> 22 #include <linux/skbuff.h> 23 #include <linux/init.h> 24 #include <linux/kmod.h> 25 #include <linux/netlink.h> 26 #include <linux/err.h> 27 #include <linux/slab.h> 28 #include <net/net_namespace.h> 29 #include <net/sock.h> 30 #include <net/netlink.h> 31 #include <net/pkt_sched.h> 32 #include <net/pkt_cls.h> 33 34 /* The list of all installed classifier types */ 35 36 static struct tcf_proto_ops *tcf_proto_base __read_mostly; 37 38 /* Protects list of registered TC modules. It is pure SMP lock. */ 39 static DEFINE_RWLOCK(cls_mod_lock); 40 41 /* Find classifier type by string name */ 42 43 static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) 44 { 45 const struct tcf_proto_ops *t = NULL; 46 47 if (kind) { 48 read_lock(&cls_mod_lock); 49 for (t = tcf_proto_base; t; t = t->next) { 50 if (nla_strcmp(kind, t->kind) == 0) { 51 if (!try_module_get(t->owner)) 52 t = NULL; 53 break; 54 } 55 } 56 read_unlock(&cls_mod_lock); 57 } 58 return t; 59 } 60 61 /* Register(unregister) new classifier type */ 62 63 int register_tcf_proto_ops(struct tcf_proto_ops *ops) 64 { 65 struct tcf_proto_ops *t, **tp; 66 int rc = -EEXIST; 67 68 write_lock(&cls_mod_lock); 69 for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) 70 if (!strcmp(ops->kind, t->kind)) 71 goto out; 72 73 ops->next = NULL; 74 *tp = ops; 75 rc = 0; 76 out: 77 write_unlock(&cls_mod_lock); 78 return rc; 79 } 80 EXPORT_SYMBOL(register_tcf_proto_ops); 81 82 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) 83 { 84 struct tcf_proto_ops *t, **tp; 85 int rc = -ENOENT; 86 87 write_lock(&cls_mod_lock); 88 for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) 89 if (t == ops) 90 break; 91 92 if (!t) 93 goto out; 94 *tp = t->next; 95 rc = 0; 96 out: 97 write_unlock(&cls_mod_lock); 98 return rc; 99 } 100 EXPORT_SYMBOL(unregister_tcf_proto_ops); 101 102 static int tfilter_notify(struct net *net, struct sk_buff *oskb, 103 struct nlmsghdr *n, struct tcf_proto *tp, 104 unsigned long fh, int event); 105 106 107 /* Select new prio value from the range, managed by kernel. */ 108 109 static inline u32 tcf_auto_prio(struct tcf_proto *tp) 110 { 111 u32 first = TC_H_MAKE(0xC0000000U, 0U); 112 113 if (tp) 114 first = tp->prio - 1; 115 116 return first; 117 } 118 119 /* Add/change/delete/get a filter node */ 120 121 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) 122 { 123 struct net *net = sock_net(skb->sk); 124 struct nlattr *tca[TCA_MAX + 1]; 125 spinlock_t *root_lock; 126 struct tcmsg *t; 127 u32 protocol; 128 u32 prio; 129 u32 nprio; 130 u32 parent; 131 struct net_device *dev; 132 struct Qdisc *q; 133 struct tcf_proto **back, **chain; 134 struct tcf_proto *tp; 135 const struct tcf_proto_ops *tp_ops; 136 const struct Qdisc_class_ops *cops; 137 unsigned long cl; 138 unsigned long fh; 139 int err; 140 int tp_created = 0; 141 142 if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) 143 return -EPERM; 144 145 replay: 146 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); 147 if (err < 0) 148 return err; 149 150 t = nlmsg_data(n); 151 protocol = TC_H_MIN(t->tcm_info); 152 prio = TC_H_MAJ(t->tcm_info); 153 nprio = prio; 154 parent = t->tcm_parent; 155 cl = 0; 156 157 if (prio == 0) { 158 /* If no priority is given, user wants we allocated it. */ 159 if (n->nlmsg_type != RTM_NEWTFILTER || 160 !(n->nlmsg_flags & NLM_F_CREATE)) 161 return -ENOENT; 162 prio = TC_H_MAKE(0x80000000U, 0U); 163 } 164 165 /* Find head of filter chain. */ 166 167 /* Find link */ 168 dev = __dev_get_by_index(net, t->tcm_ifindex); 169 if (dev == NULL) 170 return -ENODEV; 171 172 /* Find qdisc */ 173 if (!parent) { 174 q = dev->qdisc; 175 parent = q->handle; 176 } else { 177 q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); 178 if (q == NULL) 179 return -EINVAL; 180 } 181 182 /* Is it classful? */ 183 cops = q->ops->cl_ops; 184 if (!cops) 185 return -EINVAL; 186 187 if (cops->tcf_chain == NULL) 188 return -EOPNOTSUPP; 189 190 /* Do we search for filter, attached to class? */ 191 if (TC_H_MIN(parent)) { 192 cl = cops->get(q, parent); 193 if (cl == 0) 194 return -ENOENT; 195 } 196 197 /* And the last stroke */ 198 chain = cops->tcf_chain(q, cl); 199 err = -EINVAL; 200 if (chain == NULL) 201 goto errout; 202 203 /* Check the chain for existence of proto-tcf with this priority */ 204 for (back = chain; (tp = *back) != NULL; back = &tp->next) { 205 if (tp->prio >= prio) { 206 if (tp->prio == prio) { 207 if (!nprio || 208 (tp->protocol != protocol && protocol)) 209 goto errout; 210 } else 211 tp = NULL; 212 break; 213 } 214 } 215 216 root_lock = qdisc_root_sleeping_lock(q); 217 218 if (tp == NULL) { 219 /* Proto-tcf does not exist, create new one */ 220 221 if (tca[TCA_KIND] == NULL || !protocol) 222 goto errout; 223 224 err = -ENOENT; 225 if (n->nlmsg_type != RTM_NEWTFILTER || 226 !(n->nlmsg_flags & NLM_F_CREATE)) 227 goto errout; 228 229 230 /* Create new proto tcf */ 231 232 err = -ENOBUFS; 233 tp = kzalloc(sizeof(*tp), GFP_KERNEL); 234 if (tp == NULL) 235 goto errout; 236 err = -ENOENT; 237 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); 238 if (tp_ops == NULL) { 239 #ifdef CONFIG_MODULES 240 struct nlattr *kind = tca[TCA_KIND]; 241 char name[IFNAMSIZ]; 242 243 if (kind != NULL && 244 nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 245 rtnl_unlock(); 246 request_module("cls_%s", name); 247 rtnl_lock(); 248 tp_ops = tcf_proto_lookup_ops(kind); 249 /* We dropped the RTNL semaphore in order to 250 * perform the module load. So, even if we 251 * succeeded in loading the module we have to 252 * replay the request. We indicate this using 253 * -EAGAIN. 254 */ 255 if (tp_ops != NULL) { 256 module_put(tp_ops->owner); 257 err = -EAGAIN; 258 } 259 } 260 #endif 261 kfree(tp); 262 goto errout; 263 } 264 tp->ops = tp_ops; 265 tp->protocol = protocol; 266 tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back)); 267 tp->q = q; 268 tp->classify = tp_ops->classify; 269 tp->classid = parent; 270 271 err = tp_ops->init(tp); 272 if (err != 0) { 273 module_put(tp_ops->owner); 274 kfree(tp); 275 goto errout; 276 } 277 278 tp_created = 1; 279 280 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) 281 goto errout; 282 283 fh = tp->ops->get(tp, t->tcm_handle); 284 285 if (fh == 0) { 286 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { 287 spin_lock_bh(root_lock); 288 *back = tp->next; 289 spin_unlock_bh(root_lock); 290 291 tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); 292 tcf_destroy(tp); 293 err = 0; 294 goto errout; 295 } 296 297 err = -ENOENT; 298 if (n->nlmsg_type != RTM_NEWTFILTER || 299 !(n->nlmsg_flags & NLM_F_CREATE)) 300 goto errout; 301 } else { 302 switch (n->nlmsg_type) { 303 case RTM_NEWTFILTER: 304 err = -EEXIST; 305 if (n->nlmsg_flags & NLM_F_EXCL) { 306 if (tp_created) 307 tcf_destroy(tp); 308 goto errout; 309 } 310 break; 311 case RTM_DELTFILTER: 312 err = tp->ops->delete(tp, fh); 313 if (err == 0) 314 tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); 315 goto errout; 316 case RTM_GETTFILTER: 317 err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); 318 goto errout; 319 default: 320 err = -EINVAL; 321 goto errout; 322 } 323 } 324 325 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh); 326 if (err == 0) { 327 if (tp_created) { 328 spin_lock_bh(root_lock); 329 tp->next = *back; 330 *back = tp; 331 spin_unlock_bh(root_lock); 332 } 333 tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); 334 } else { 335 if (tp_created) 336 tcf_destroy(tp); 337 } 338 339 errout: 340 if (cl) 341 cops->put(q, cl); 342 if (err == -EAGAIN) 343 /* Replay the request. */ 344 goto replay; 345 return err; 346 } 347 348 static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, 349 unsigned long fh, u32 portid, u32 seq, u16 flags, int event) 350 { 351 struct tcmsg *tcm; 352 struct nlmsghdr *nlh; 353 unsigned char *b = skb_tail_pointer(skb); 354 355 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 356 if (!nlh) 357 goto out_nlmsg_trim; 358 tcm = nlmsg_data(nlh); 359 tcm->tcm_family = AF_UNSPEC; 360 tcm->tcm__pad1 = 0; 361 tcm->tcm__pad2 = 0; 362 tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; 363 tcm->tcm_parent = tp->classid; 364 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 365 if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) 366 goto nla_put_failure; 367 tcm->tcm_handle = fh; 368 if (RTM_DELTFILTER != event) { 369 tcm->tcm_handle = 0; 370 if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) 371 goto nla_put_failure; 372 } 373 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 374 return skb->len; 375 376 out_nlmsg_trim: 377 nla_put_failure: 378 nlmsg_trim(skb, b); 379 return -1; 380 } 381 382 static int tfilter_notify(struct net *net, struct sk_buff *oskb, 383 struct nlmsghdr *n, struct tcf_proto *tp, 384 unsigned long fh, int event) 385 { 386 struct sk_buff *skb; 387 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 388 389 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 390 if (!skb) 391 return -ENOBUFS; 392 393 if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { 394 kfree_skb(skb); 395 return -EINVAL; 396 } 397 398 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 399 n->nlmsg_flags & NLM_F_ECHO); 400 } 401 402 struct tcf_dump_args { 403 struct tcf_walker w; 404 struct sk_buff *skb; 405 struct netlink_callback *cb; 406 }; 407 408 static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, 409 struct tcf_walker *arg) 410 { 411 struct tcf_dump_args *a = (void *)arg; 412 413 return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, 414 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); 415 } 416 417 /* called with RTNL */ 418 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) 419 { 420 struct net *net = sock_net(skb->sk); 421 int t; 422 int s_t; 423 struct net_device *dev; 424 struct Qdisc *q; 425 struct tcf_proto *tp, **chain; 426 struct tcmsg *tcm = nlmsg_data(cb->nlh); 427 unsigned long cl = 0; 428 const struct Qdisc_class_ops *cops; 429 struct tcf_dump_args arg; 430 431 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 432 return skb->len; 433 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 434 if (!dev) 435 return skb->len; 436 437 if (!tcm->tcm_parent) 438 q = dev->qdisc; 439 else 440 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 441 if (!q) 442 goto out; 443 cops = q->ops->cl_ops; 444 if (!cops) 445 goto errout; 446 if (cops->tcf_chain == NULL) 447 goto errout; 448 if (TC_H_MIN(tcm->tcm_parent)) { 449 cl = cops->get(q, tcm->tcm_parent); 450 if (cl == 0) 451 goto errout; 452 } 453 chain = cops->tcf_chain(q, cl); 454 if (chain == NULL) 455 goto errout; 456 457 s_t = cb->args[0]; 458 459 for (tp = *chain, t = 0; tp; tp = tp->next, t++) { 460 if (t < s_t) 461 continue; 462 if (TC_H_MAJ(tcm->tcm_info) && 463 TC_H_MAJ(tcm->tcm_info) != tp->prio) 464 continue; 465 if (TC_H_MIN(tcm->tcm_info) && 466 TC_H_MIN(tcm->tcm_info) != tp->protocol) 467 continue; 468 if (t > s_t) 469 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 470 if (cb->args[1] == 0) { 471 if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid, 472 cb->nlh->nlmsg_seq, NLM_F_MULTI, 473 RTM_NEWTFILTER) <= 0) 474 break; 475 476 cb->args[1] = 1; 477 } 478 if (tp->ops->walk == NULL) 479 continue; 480 arg.w.fn = tcf_node_dump; 481 arg.skb = skb; 482 arg.cb = cb; 483 arg.w.stop = 0; 484 arg.w.skip = cb->args[1] - 1; 485 arg.w.count = 0; 486 tp->ops->walk(tp, &arg.w); 487 cb->args[1] = arg.w.count + 1; 488 if (arg.w.stop) 489 break; 490 } 491 492 cb->args[0] = t; 493 494 errout: 495 if (cl) 496 cops->put(q, cl); 497 out: 498 return skb->len; 499 } 500 501 void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) 502 { 503 #ifdef CONFIG_NET_CLS_ACT 504 if (exts->action) { 505 tcf_action_destroy(exts->action, TCA_ACT_UNBIND); 506 exts->action = NULL; 507 } 508 #endif 509 } 510 EXPORT_SYMBOL(tcf_exts_destroy); 511 512 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, 513 struct nlattr *rate_tlv, struct tcf_exts *exts, 514 const struct tcf_ext_map *map) 515 { 516 memset(exts, 0, sizeof(*exts)); 517 518 #ifdef CONFIG_NET_CLS_ACT 519 { 520 struct tc_action *act; 521 522 if (map->police && tb[map->police]) { 523 act = tcf_action_init_1(net, tb[map->police], rate_tlv, 524 "police", TCA_ACT_NOREPLACE, 525 TCA_ACT_BIND); 526 if (IS_ERR(act)) 527 return PTR_ERR(act); 528 529 act->type = TCA_OLD_COMPAT; 530 exts->action = act; 531 } else if (map->action && tb[map->action]) { 532 act = tcf_action_init(net, tb[map->action], rate_tlv, 533 NULL, TCA_ACT_NOREPLACE, 534 TCA_ACT_BIND); 535 if (IS_ERR(act)) 536 return PTR_ERR(act); 537 538 exts->action = act; 539 } 540 } 541 #else 542 if ((map->action && tb[map->action]) || 543 (map->police && tb[map->police])) 544 return -EOPNOTSUPP; 545 #endif 546 547 return 0; 548 } 549 EXPORT_SYMBOL(tcf_exts_validate); 550 551 void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, 552 struct tcf_exts *src) 553 { 554 #ifdef CONFIG_NET_CLS_ACT 555 if (src->action) { 556 struct tc_action *act; 557 tcf_tree_lock(tp); 558 act = dst->action; 559 dst->action = src->action; 560 tcf_tree_unlock(tp); 561 if (act) 562 tcf_action_destroy(act, TCA_ACT_UNBIND); 563 } 564 #endif 565 } 566 EXPORT_SYMBOL(tcf_exts_change); 567 568 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, 569 const struct tcf_ext_map *map) 570 { 571 #ifdef CONFIG_NET_CLS_ACT 572 if (map->action && exts->action) { 573 /* 574 * again for backward compatible mode - we want 575 * to work with both old and new modes of entering 576 * tc data even if iproute2 was newer - jhs 577 */ 578 struct nlattr *nest; 579 580 if (exts->action->type != TCA_OLD_COMPAT) { 581 nest = nla_nest_start(skb, map->action); 582 if (nest == NULL) 583 goto nla_put_failure; 584 if (tcf_action_dump(skb, exts->action, 0, 0) < 0) 585 goto nla_put_failure; 586 nla_nest_end(skb, nest); 587 } else if (map->police) { 588 nest = nla_nest_start(skb, map->police); 589 if (nest == NULL) 590 goto nla_put_failure; 591 if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) 592 goto nla_put_failure; 593 nla_nest_end(skb, nest); 594 } 595 } 596 #endif 597 return 0; 598 nla_put_failure: __attribute__ ((unused)) 599 return -1; 600 } 601 EXPORT_SYMBOL(tcf_exts_dump); 602 603 604 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, 605 const struct tcf_ext_map *map) 606 { 607 #ifdef CONFIG_NET_CLS_ACT 608 if (exts->action) 609 if (tcf_action_copy_stats(skb, exts->action, 1) < 0) 610 goto nla_put_failure; 611 #endif 612 return 0; 613 nla_put_failure: __attribute__ ((unused)) 614 return -1; 615 } 616 EXPORT_SYMBOL(tcf_exts_dump_stats); 617 618 static int __init tc_filter_init(void) 619 { 620 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL); 621 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL); 622 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, 623 tc_dump_tfilter, NULL); 624 625 return 0; 626 } 627 628 subsys_initcall(tc_filter_init); 629