1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/cls_api.c Packet classifier API. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 * Changes: 8 * 9 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 10 */ 11 12 #include <linux/module.h> 13 #include <linux/types.h> 14 #include <linux/kernel.h> 15 #include <linux/string.h> 16 #include <linux/errno.h> 17 #include <linux/err.h> 18 #include <linux/skbuff.h> 19 #include <linux/init.h> 20 #include <linux/kmod.h> 21 #include <linux/slab.h> 22 #include <linux/idr.h> 23 #include <linux/rhashtable.h> 24 #include <net/net_namespace.h> 25 #include <net/sock.h> 26 #include <net/netlink.h> 27 #include <net/pkt_sched.h> 28 #include <net/pkt_cls.h> 29 #include <net/tc_act/tc_pedit.h> 30 #include <net/tc_act/tc_mirred.h> 31 #include <net/tc_act/tc_vlan.h> 32 #include <net/tc_act/tc_tunnel_key.h> 33 #include <net/tc_act/tc_csum.h> 34 #include <net/tc_act/tc_gact.h> 35 #include <net/tc_act/tc_police.h> 36 #include <net/tc_act/tc_sample.h> 37 #include <net/tc_act/tc_skbedit.h> 38 #include <net/tc_act/tc_ct.h> 39 #include <net/tc_act/tc_mpls.h> 40 #include <net/flow_offload.h> 41 42 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; 43 44 /* The list of all installed classifier types */ 45 static LIST_HEAD(tcf_proto_base); 46 47 /* Protects list of registered TC modules. It is pure SMP lock. */ 48 static DEFINE_RWLOCK(cls_mod_lock); 49 50 /* Find classifier type by string name */ 51 52 static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) 53 { 54 const struct tcf_proto_ops *t, *res = NULL; 55 56 if (kind) { 57 read_lock(&cls_mod_lock); 58 list_for_each_entry(t, &tcf_proto_base, head) { 59 if (strcmp(kind, t->kind) == 0) { 60 if (try_module_get(t->owner)) 61 res = t; 62 break; 63 } 64 } 65 read_unlock(&cls_mod_lock); 66 } 67 return res; 68 } 69 70 static const struct tcf_proto_ops * 71 tcf_proto_lookup_ops(const char *kind, bool rtnl_held, 72 struct netlink_ext_ack *extack) 73 { 74 const struct tcf_proto_ops *ops; 75 76 ops = __tcf_proto_lookup_ops(kind); 77 if (ops) 78 return ops; 79 #ifdef CONFIG_MODULES 80 if (rtnl_held) 81 rtnl_unlock(); 82 request_module("cls_%s", kind); 83 if (rtnl_held) 84 rtnl_lock(); 85 ops = __tcf_proto_lookup_ops(kind); 86 /* We dropped the RTNL semaphore in order to perform 87 * the module load. So, even if we succeeded in loading 88 * the module we have to replay the request. We indicate 89 * this using -EAGAIN. 90 */ 91 if (ops) { 92 module_put(ops->owner); 93 return ERR_PTR(-EAGAIN); 94 } 95 #endif 96 NL_SET_ERR_MSG(extack, "TC classifier not found"); 97 return ERR_PTR(-ENOENT); 98 } 99 100 /* Register(unregister) new classifier type */ 101 102 int register_tcf_proto_ops(struct tcf_proto_ops *ops) 103 { 104 struct tcf_proto_ops *t; 105 int rc = -EEXIST; 106 107 write_lock(&cls_mod_lock); 108 list_for_each_entry(t, &tcf_proto_base, head) 109 if (!strcmp(ops->kind, t->kind)) 110 goto out; 111 112 list_add_tail(&ops->head, &tcf_proto_base); 113 rc = 0; 114 out: 115 write_unlock(&cls_mod_lock); 116 return rc; 117 } 118 EXPORT_SYMBOL(register_tcf_proto_ops); 119 120 static struct workqueue_struct *tc_filter_wq; 121 122 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) 123 { 124 struct tcf_proto_ops *t; 125 int rc = -ENOENT; 126 127 /* Wait for outstanding call_rcu()s, if any, from a 128 * tcf_proto_ops's destroy() handler. 129 */ 130 rcu_barrier(); 131 flush_workqueue(tc_filter_wq); 132 133 write_lock(&cls_mod_lock); 134 list_for_each_entry(t, &tcf_proto_base, head) { 135 if (t == ops) { 136 list_del(&t->head); 137 rc = 0; 138 break; 139 } 140 } 141 write_unlock(&cls_mod_lock); 142 return rc; 143 } 144 EXPORT_SYMBOL(unregister_tcf_proto_ops); 145 146 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func) 147 { 148 INIT_RCU_WORK(rwork, func); 149 return queue_rcu_work(tc_filter_wq, rwork); 150 } 151 EXPORT_SYMBOL(tcf_queue_work); 152 153 /* Select new prio value from the range, managed by kernel. */ 154 155 static inline u32 tcf_auto_prio(struct tcf_proto *tp) 156 { 157 u32 first = TC_H_MAKE(0xC0000000U, 0U); 158 159 if (tp) 160 first = tp->prio - 1; 161 162 return TC_H_MAJ(first); 163 } 164 165 static bool tcf_proto_check_kind(struct nlattr *kind, char *name) 166 { 167 if (kind) 168 return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ; 169 memset(name, 0, IFNAMSIZ); 170 return false; 171 } 172 173 static bool tcf_proto_is_unlocked(const char *kind) 174 { 175 const struct tcf_proto_ops *ops; 176 bool ret; 177 178 if (strlen(kind) == 0) 179 return false; 180 181 ops = tcf_proto_lookup_ops(kind, false, NULL); 182 /* On error return false to take rtnl lock. Proto lookup/create 183 * functions will perform lookup again and properly handle errors. 184 */ 185 if (IS_ERR(ops)) 186 return false; 187 188 ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED); 189 module_put(ops->owner); 190 return ret; 191 } 192 193 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, 194 u32 prio, struct tcf_chain *chain, 195 bool rtnl_held, 196 struct netlink_ext_ack *extack) 197 { 198 struct tcf_proto *tp; 199 int err; 200 201 tp = kzalloc(sizeof(*tp), GFP_KERNEL); 202 if (!tp) 203 return ERR_PTR(-ENOBUFS); 204 205 tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack); 206 if (IS_ERR(tp->ops)) { 207 err = PTR_ERR(tp->ops); 208 goto errout; 209 } 210 tp->classify = tp->ops->classify; 211 tp->protocol = protocol; 212 tp->prio = prio; 213 tp->chain = chain; 214 spin_lock_init(&tp->lock); 215 refcount_set(&tp->refcnt, 1); 216 217 err = tp->ops->init(tp); 218 if (err) { 219 module_put(tp->ops->owner); 220 goto errout; 221 } 222 return tp; 223 224 errout: 225 kfree(tp); 226 return ERR_PTR(err); 227 } 228 229 static void tcf_proto_get(struct tcf_proto *tp) 230 { 231 refcount_inc(&tp->refcnt); 232 } 233 234 static void tcf_chain_put(struct tcf_chain *chain); 235 236 static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, 237 struct netlink_ext_ack *extack) 238 { 239 tp->ops->destroy(tp, rtnl_held, extack); 240 tcf_chain_put(tp->chain); 241 module_put(tp->ops->owner); 242 kfree_rcu(tp, rcu); 243 } 244 245 static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, 246 struct netlink_ext_ack *extack) 247 { 248 if (refcount_dec_and_test(&tp->refcnt)) 249 tcf_proto_destroy(tp, rtnl_held, extack); 250 } 251 252 static int walker_check_empty(struct tcf_proto *tp, void *fh, 253 struct tcf_walker *arg) 254 { 255 if (fh) { 256 arg->nonempty = true; 257 return -1; 258 } 259 return 0; 260 } 261 262 static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) 263 { 264 struct tcf_walker walker = { .fn = walker_check_empty, }; 265 266 if (tp->ops->walk) { 267 tp->ops->walk(tp, &walker, rtnl_held); 268 return !walker.nonempty; 269 } 270 return true; 271 } 272 273 static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) 274 { 275 spin_lock(&tp->lock); 276 if (tcf_proto_is_empty(tp, rtnl_held)) 277 tp->deleting = true; 278 spin_unlock(&tp->lock); 279 return tp->deleting; 280 } 281 282 static void tcf_proto_mark_delete(struct tcf_proto *tp) 283 { 284 spin_lock(&tp->lock); 285 tp->deleting = true; 286 spin_unlock(&tp->lock); 287 } 288 289 static bool tcf_proto_is_deleting(struct tcf_proto *tp) 290 { 291 bool deleting; 292 293 spin_lock(&tp->lock); 294 deleting = tp->deleting; 295 spin_unlock(&tp->lock); 296 297 return deleting; 298 } 299 300 #define ASSERT_BLOCK_LOCKED(block) \ 301 lockdep_assert_held(&(block)->lock) 302 303 struct tcf_filter_chain_list_item { 304 struct list_head list; 305 tcf_chain_head_change_t *chain_head_change; 306 void *chain_head_change_priv; 307 }; 308 309 static struct tcf_chain *tcf_chain_create(struct tcf_block *block, 310 u32 chain_index) 311 { 312 struct tcf_chain *chain; 313 314 ASSERT_BLOCK_LOCKED(block); 315 316 chain = kzalloc(sizeof(*chain), GFP_KERNEL); 317 if (!chain) 318 return NULL; 319 list_add_tail(&chain->list, &block->chain_list); 320 mutex_init(&chain->filter_chain_lock); 321 chain->block = block; 322 chain->index = chain_index; 323 chain->refcnt = 1; 324 if (!chain->index) 325 block->chain0.chain = chain; 326 return chain; 327 } 328 329 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item, 330 struct tcf_proto *tp_head) 331 { 332 if (item->chain_head_change) 333 item->chain_head_change(tp_head, item->chain_head_change_priv); 334 } 335 336 static void tcf_chain0_head_change(struct tcf_chain *chain, 337 struct tcf_proto *tp_head) 338 { 339 struct tcf_filter_chain_list_item *item; 340 struct tcf_block *block = chain->block; 341 342 if (chain->index) 343 return; 344 345 mutex_lock(&block->lock); 346 list_for_each_entry(item, &block->chain0.filter_chain_list, list) 347 tcf_chain_head_change_item(item, tp_head); 348 mutex_unlock(&block->lock); 349 } 350 351 /* Returns true if block can be safely freed. */ 352 353 static bool tcf_chain_detach(struct tcf_chain *chain) 354 { 355 struct tcf_block *block = chain->block; 356 357 ASSERT_BLOCK_LOCKED(block); 358 359 list_del(&chain->list); 360 if (!chain->index) 361 block->chain0.chain = NULL; 362 363 if (list_empty(&block->chain_list) && 364 refcount_read(&block->refcnt) == 0) 365 return true; 366 367 return false; 368 } 369 370 static void tcf_block_destroy(struct tcf_block *block) 371 { 372 mutex_destroy(&block->lock); 373 kfree_rcu(block, rcu); 374 } 375 376 static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) 377 { 378 struct tcf_block *block = chain->block; 379 380 mutex_destroy(&chain->filter_chain_lock); 381 kfree_rcu(chain, rcu); 382 if (free_block) 383 tcf_block_destroy(block); 384 } 385 386 static void tcf_chain_hold(struct tcf_chain *chain) 387 { 388 ASSERT_BLOCK_LOCKED(chain->block); 389 390 ++chain->refcnt; 391 } 392 393 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) 394 { 395 ASSERT_BLOCK_LOCKED(chain->block); 396 397 /* In case all the references are action references, this 398 * chain should not be shown to the user. 399 */ 400 return chain->refcnt == chain->action_refcnt; 401 } 402 403 static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, 404 u32 chain_index) 405 { 406 struct tcf_chain *chain; 407 408 ASSERT_BLOCK_LOCKED(block); 409 410 list_for_each_entry(chain, &block->chain_list, list) { 411 if (chain->index == chain_index) 412 return chain; 413 } 414 return NULL; 415 } 416 417 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, 418 u32 seq, u16 flags, int event, bool unicast); 419 420 static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, 421 u32 chain_index, bool create, 422 bool by_act) 423 { 424 struct tcf_chain *chain = NULL; 425 bool is_first_reference; 426 427 mutex_lock(&block->lock); 428 chain = tcf_chain_lookup(block, chain_index); 429 if (chain) { 430 tcf_chain_hold(chain); 431 } else { 432 if (!create) 433 goto errout; 434 chain = tcf_chain_create(block, chain_index); 435 if (!chain) 436 goto errout; 437 } 438 439 if (by_act) 440 ++chain->action_refcnt; 441 is_first_reference = chain->refcnt - chain->action_refcnt == 1; 442 mutex_unlock(&block->lock); 443 444 /* Send notification only in case we got the first 445 * non-action reference. Until then, the chain acts only as 446 * a placeholder for actions pointing to it and user ought 447 * not know about them. 448 */ 449 if (is_first_reference && !by_act) 450 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, 451 RTM_NEWCHAIN, false); 452 453 return chain; 454 455 errout: 456 mutex_unlock(&block->lock); 457 return chain; 458 } 459 460 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, 461 bool create) 462 { 463 return __tcf_chain_get(block, chain_index, create, false); 464 } 465 466 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) 467 { 468 return __tcf_chain_get(block, chain_index, true, true); 469 } 470 EXPORT_SYMBOL(tcf_chain_get_by_act); 471 472 static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, 473 void *tmplt_priv); 474 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, 475 void *tmplt_priv, u32 chain_index, 476 struct tcf_block *block, struct sk_buff *oskb, 477 u32 seq, u16 flags, bool unicast); 478 479 static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, 480 bool explicitly_created) 481 { 482 struct tcf_block *block = chain->block; 483 const struct tcf_proto_ops *tmplt_ops; 484 bool free_block = false; 485 unsigned int refcnt; 486 void *tmplt_priv; 487 488 mutex_lock(&block->lock); 489 if (explicitly_created) { 490 if (!chain->explicitly_created) { 491 mutex_unlock(&block->lock); 492 return; 493 } 494 chain->explicitly_created = false; 495 } 496 497 if (by_act) 498 chain->action_refcnt--; 499 500 /* tc_chain_notify_delete can't be called while holding block lock. 501 * However, when block is unlocked chain can be changed concurrently, so 502 * save these to temporary variables. 503 */ 504 refcnt = --chain->refcnt; 505 tmplt_ops = chain->tmplt_ops; 506 tmplt_priv = chain->tmplt_priv; 507 508 /* The last dropped non-action reference will trigger notification. */ 509 if (refcnt - chain->action_refcnt == 0 && !by_act) { 510 tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, 511 block, NULL, 0, 0, false); 512 /* Last reference to chain, no need to lock. */ 513 chain->flushing = false; 514 } 515 516 if (refcnt == 0) 517 free_block = tcf_chain_detach(chain); 518 mutex_unlock(&block->lock); 519 520 if (refcnt == 0) { 521 tc_chain_tmplt_del(tmplt_ops, tmplt_priv); 522 tcf_chain_destroy(chain, free_block); 523 } 524 } 525 526 static void tcf_chain_put(struct tcf_chain *chain) 527 { 528 __tcf_chain_put(chain, false, false); 529 } 530 531 void tcf_chain_put_by_act(struct tcf_chain *chain) 532 { 533 __tcf_chain_put(chain, true, false); 534 } 535 EXPORT_SYMBOL(tcf_chain_put_by_act); 536 537 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) 538 { 539 __tcf_chain_put(chain, false, true); 540 } 541 542 static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) 543 { 544 struct tcf_proto *tp, *tp_next; 545 546 mutex_lock(&chain->filter_chain_lock); 547 tp = tcf_chain_dereference(chain->filter_chain, chain); 548 RCU_INIT_POINTER(chain->filter_chain, NULL); 549 tcf_chain0_head_change(chain, NULL); 550 chain->flushing = true; 551 mutex_unlock(&chain->filter_chain_lock); 552 553 while (tp) { 554 tp_next = rcu_dereference_protected(tp->next, 1); 555 tcf_proto_put(tp, rtnl_held, NULL); 556 tp = tp_next; 557 } 558 } 559 560 static int tcf_block_setup(struct tcf_block *block, 561 struct flow_block_offload *bo); 562 563 static void tc_indr_block_ing_cmd(struct net_device *dev, 564 struct tcf_block *block, 565 flow_indr_block_bind_cb_t *cb, 566 void *cb_priv, 567 enum flow_block_command command) 568 { 569 struct flow_block_offload bo = { 570 .command = command, 571 .binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, 572 .net = dev_net(dev), 573 .block_shared = tcf_block_non_null_shared(block), 574 }; 575 INIT_LIST_HEAD(&bo.cb_list); 576 577 if (!block) 578 return; 579 580 bo.block = &block->flow_block; 581 582 down_write(&block->cb_lock); 583 cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); 584 585 tcf_block_setup(block, &bo); 586 up_write(&block->cb_lock); 587 } 588 589 static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) 590 { 591 const struct Qdisc_class_ops *cops; 592 struct Qdisc *qdisc; 593 594 if (!dev_ingress_queue(dev)) 595 return NULL; 596 597 qdisc = dev_ingress_queue(dev)->qdisc_sleeping; 598 if (!qdisc) 599 return NULL; 600 601 cops = qdisc->ops->cl_ops; 602 if (!cops) 603 return NULL; 604 605 if (!cops->tcf_block) 606 return NULL; 607 608 return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); 609 } 610 611 static void tc_indr_block_get_and_ing_cmd(struct net_device *dev, 612 flow_indr_block_bind_cb_t *cb, 613 void *cb_priv, 614 enum flow_block_command command) 615 { 616 struct tcf_block *block = tc_dev_ingress_block(dev); 617 618 tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command); 619 } 620 621 static void tc_indr_block_call(struct tcf_block *block, 622 struct net_device *dev, 623 struct tcf_block_ext_info *ei, 624 enum flow_block_command command, 625 struct netlink_ext_ack *extack) 626 { 627 struct flow_block_offload bo = { 628 .command = command, 629 .binder_type = ei->binder_type, 630 .net = dev_net(dev), 631 .block = &block->flow_block, 632 .block_shared = tcf_block_shared(block), 633 .extack = extack, 634 }; 635 INIT_LIST_HEAD(&bo.cb_list); 636 637 flow_indr_block_call(dev, &bo, command); 638 tcf_block_setup(block, &bo); 639 } 640 641 static bool tcf_block_offload_in_use(struct tcf_block *block) 642 { 643 return atomic_read(&block->offloadcnt); 644 } 645 646 static int tcf_block_offload_cmd(struct tcf_block *block, 647 struct net_device *dev, 648 struct tcf_block_ext_info *ei, 649 enum flow_block_command command, 650 struct netlink_ext_ack *extack) 651 { 652 struct flow_block_offload bo = {}; 653 int err; 654 655 bo.net = dev_net(dev); 656 bo.command = command; 657 bo.binder_type = ei->binder_type; 658 bo.block = &block->flow_block; 659 bo.block_shared = tcf_block_shared(block); 660 bo.extack = extack; 661 INIT_LIST_HEAD(&bo.cb_list); 662 663 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); 664 if (err < 0) 665 return err; 666 667 return tcf_block_setup(block, &bo); 668 } 669 670 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, 671 struct tcf_block_ext_info *ei, 672 struct netlink_ext_ack *extack) 673 { 674 struct net_device *dev = q->dev_queue->dev; 675 int err; 676 677 down_write(&block->cb_lock); 678 if (!dev->netdev_ops->ndo_setup_tc) 679 goto no_offload_dev_inc; 680 681 /* If tc offload feature is disabled and the block we try to bind 682 * to already has some offloaded filters, forbid to bind. 683 */ 684 if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) { 685 NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); 686 err = -EOPNOTSUPP; 687 goto err_unlock; 688 } 689 690 err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack); 691 if (err == -EOPNOTSUPP) 692 goto no_offload_dev_inc; 693 if (err) 694 goto err_unlock; 695 696 tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); 697 up_write(&block->cb_lock); 698 return 0; 699 700 no_offload_dev_inc: 701 if (tcf_block_offload_in_use(block)) { 702 err = -EOPNOTSUPP; 703 goto err_unlock; 704 } 705 err = 0; 706 block->nooffloaddevcnt++; 707 tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); 708 err_unlock: 709 up_write(&block->cb_lock); 710 return err; 711 } 712 713 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, 714 struct tcf_block_ext_info *ei) 715 { 716 struct net_device *dev = q->dev_queue->dev; 717 int err; 718 719 down_write(&block->cb_lock); 720 tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); 721 722 if (!dev->netdev_ops->ndo_setup_tc) 723 goto no_offload_dev_dec; 724 err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); 725 if (err == -EOPNOTSUPP) 726 goto no_offload_dev_dec; 727 up_write(&block->cb_lock); 728 return; 729 730 no_offload_dev_dec: 731 WARN_ON(block->nooffloaddevcnt-- == 0); 732 up_write(&block->cb_lock); 733 } 734 735 static int 736 tcf_chain0_head_change_cb_add(struct tcf_block *block, 737 struct tcf_block_ext_info *ei, 738 struct netlink_ext_ack *extack) 739 { 740 struct tcf_filter_chain_list_item *item; 741 struct tcf_chain *chain0; 742 743 item = kmalloc(sizeof(*item), GFP_KERNEL); 744 if (!item) { 745 NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed"); 746 return -ENOMEM; 747 } 748 item->chain_head_change = ei->chain_head_change; 749 item->chain_head_change_priv = ei->chain_head_change_priv; 750 751 mutex_lock(&block->lock); 752 chain0 = block->chain0.chain; 753 if (chain0) 754 tcf_chain_hold(chain0); 755 else 756 list_add(&item->list, &block->chain0.filter_chain_list); 757 mutex_unlock(&block->lock); 758 759 if (chain0) { 760 struct tcf_proto *tp_head; 761 762 mutex_lock(&chain0->filter_chain_lock); 763 764 tp_head = tcf_chain_dereference(chain0->filter_chain, chain0); 765 if (tp_head) 766 tcf_chain_head_change_item(item, tp_head); 767 768 mutex_lock(&block->lock); 769 list_add(&item->list, &block->chain0.filter_chain_list); 770 mutex_unlock(&block->lock); 771 772 mutex_unlock(&chain0->filter_chain_lock); 773 tcf_chain_put(chain0); 774 } 775 776 return 0; 777 } 778 779 static void 780 tcf_chain0_head_change_cb_del(struct tcf_block *block, 781 struct tcf_block_ext_info *ei) 782 { 783 struct tcf_filter_chain_list_item *item; 784 785 mutex_lock(&block->lock); 786 list_for_each_entry(item, &block->chain0.filter_chain_list, list) { 787 if ((!ei->chain_head_change && !ei->chain_head_change_priv) || 788 (item->chain_head_change == ei->chain_head_change && 789 item->chain_head_change_priv == ei->chain_head_change_priv)) { 790 if (block->chain0.chain) 791 tcf_chain_head_change_item(item, NULL); 792 list_del(&item->list); 793 mutex_unlock(&block->lock); 794 795 kfree(item); 796 return; 797 } 798 } 799 mutex_unlock(&block->lock); 800 WARN_ON(1); 801 } 802 803 struct tcf_net { 804 spinlock_t idr_lock; /* Protects idr */ 805 struct idr idr; 806 }; 807 808 static unsigned int tcf_net_id; 809 810 static int tcf_block_insert(struct tcf_block *block, struct net *net, 811 struct netlink_ext_ack *extack) 812 { 813 struct tcf_net *tn = net_generic(net, tcf_net_id); 814 int err; 815 816 idr_preload(GFP_KERNEL); 817 spin_lock(&tn->idr_lock); 818 err = idr_alloc_u32(&tn->idr, block, &block->index, block->index, 819 GFP_NOWAIT); 820 spin_unlock(&tn->idr_lock); 821 idr_preload_end(); 822 823 return err; 824 } 825 826 static void tcf_block_remove(struct tcf_block *block, struct net *net) 827 { 828 struct tcf_net *tn = net_generic(net, tcf_net_id); 829 830 spin_lock(&tn->idr_lock); 831 idr_remove(&tn->idr, block->index); 832 spin_unlock(&tn->idr_lock); 833 } 834 835 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, 836 u32 block_index, 837 struct netlink_ext_ack *extack) 838 { 839 struct tcf_block *block; 840 841 block = kzalloc(sizeof(*block), GFP_KERNEL); 842 if (!block) { 843 NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); 844 return ERR_PTR(-ENOMEM); 845 } 846 mutex_init(&block->lock); 847 init_rwsem(&block->cb_lock); 848 flow_block_init(&block->flow_block); 849 INIT_LIST_HEAD(&block->chain_list); 850 INIT_LIST_HEAD(&block->owner_list); 851 INIT_LIST_HEAD(&block->chain0.filter_chain_list); 852 853 refcount_set(&block->refcnt, 1); 854 block->net = net; 855 block->index = block_index; 856 857 /* Don't store q pointer for blocks which are shared */ 858 if (!tcf_block_shared(block)) 859 block->q = q; 860 return block; 861 } 862 863 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index) 864 { 865 struct tcf_net *tn = net_generic(net, tcf_net_id); 866 867 return idr_find(&tn->idr, block_index); 868 } 869 870 static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) 871 { 872 struct tcf_block *block; 873 874 rcu_read_lock(); 875 block = tcf_block_lookup(net, block_index); 876 if (block && !refcount_inc_not_zero(&block->refcnt)) 877 block = NULL; 878 rcu_read_unlock(); 879 880 return block; 881 } 882 883 static struct tcf_chain * 884 __tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) 885 { 886 mutex_lock(&block->lock); 887 if (chain) 888 chain = list_is_last(&chain->list, &block->chain_list) ? 889 NULL : list_next_entry(chain, list); 890 else 891 chain = list_first_entry_or_null(&block->chain_list, 892 struct tcf_chain, list); 893 894 /* skip all action-only chains */ 895 while (chain && tcf_chain_held_by_acts_only(chain)) 896 chain = list_is_last(&chain->list, &block->chain_list) ? 897 NULL : list_next_entry(chain, list); 898 899 if (chain) 900 tcf_chain_hold(chain); 901 mutex_unlock(&block->lock); 902 903 return chain; 904 } 905 906 /* Function to be used by all clients that want to iterate over all chains on 907 * block. It properly obtains block->lock and takes reference to chain before 908 * returning it. Users of this function must be tolerant to concurrent chain 909 * insertion/deletion or ensure that no concurrent chain modification is 910 * possible. Note that all netlink dump callbacks cannot guarantee to provide 911 * consistent dump because rtnl lock is released each time skb is filled with 912 * data and sent to user-space. 913 */ 914 915 struct tcf_chain * 916 tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) 917 { 918 struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain); 919 920 if (chain) 921 tcf_chain_put(chain); 922 923 return chain_next; 924 } 925 EXPORT_SYMBOL(tcf_get_next_chain); 926 927 static struct tcf_proto * 928 __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) 929 { 930 u32 prio = 0; 931 932 ASSERT_RTNL(); 933 mutex_lock(&chain->filter_chain_lock); 934 935 if (!tp) { 936 tp = tcf_chain_dereference(chain->filter_chain, chain); 937 } else if (tcf_proto_is_deleting(tp)) { 938 /* 'deleting' flag is set and chain->filter_chain_lock was 939 * unlocked, which means next pointer could be invalid. Restart 940 * search. 941 */ 942 prio = tp->prio + 1; 943 tp = tcf_chain_dereference(chain->filter_chain, chain); 944 945 for (; tp; tp = tcf_chain_dereference(tp->next, chain)) 946 if (!tp->deleting && tp->prio >= prio) 947 break; 948 } else { 949 tp = tcf_chain_dereference(tp->next, chain); 950 } 951 952 if (tp) 953 tcf_proto_get(tp); 954 955 mutex_unlock(&chain->filter_chain_lock); 956 957 return tp; 958 } 959 960 /* Function to be used by all clients that want to iterate over all tp's on 961 * chain. Users of this function must be tolerant to concurrent tp 962 * insertion/deletion or ensure that no concurrent chain modification is 963 * possible. Note that all netlink dump callbacks cannot guarantee to provide 964 * consistent dump because rtnl lock is released each time skb is filled with 965 * data and sent to user-space. 966 */ 967 968 struct tcf_proto * 969 tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp, 970 bool rtnl_held) 971 { 972 struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); 973 974 if (tp) 975 tcf_proto_put(tp, rtnl_held, NULL); 976 977 return tp_next; 978 } 979 EXPORT_SYMBOL(tcf_get_next_proto); 980 981 static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) 982 { 983 struct tcf_chain *chain; 984 985 /* Last reference to block. At this point chains cannot be added or 986 * removed concurrently. 987 */ 988 for (chain = tcf_get_next_chain(block, NULL); 989 chain; 990 chain = tcf_get_next_chain(block, chain)) { 991 tcf_chain_put_explicitly_created(chain); 992 tcf_chain_flush(chain, rtnl_held); 993 } 994 } 995 996 /* Lookup Qdisc and increments its reference counter. 997 * Set parent, if necessary. 998 */ 999 1000 static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, 1001 u32 *parent, int ifindex, bool rtnl_held, 1002 struct netlink_ext_ack *extack) 1003 { 1004 const struct Qdisc_class_ops *cops; 1005 struct net_device *dev; 1006 int err = 0; 1007 1008 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) 1009 return 0; 1010 1011 rcu_read_lock(); 1012 1013 /* Find link */ 1014 dev = dev_get_by_index_rcu(net, ifindex); 1015 if (!dev) { 1016 rcu_read_unlock(); 1017 return -ENODEV; 1018 } 1019 1020 /* Find qdisc */ 1021 if (!*parent) { 1022 *q = dev->qdisc; 1023 *parent = (*q)->handle; 1024 } else { 1025 *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); 1026 if (!*q) { 1027 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); 1028 err = -EINVAL; 1029 goto errout_rcu; 1030 } 1031 } 1032 1033 *q = qdisc_refcount_inc_nz(*q); 1034 if (!*q) { 1035 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); 1036 err = -EINVAL; 1037 goto errout_rcu; 1038 } 1039 1040 /* Is it classful? */ 1041 cops = (*q)->ops->cl_ops; 1042 if (!cops) { 1043 NL_SET_ERR_MSG(extack, "Qdisc not classful"); 1044 err = -EINVAL; 1045 goto errout_qdisc; 1046 } 1047 1048 if (!cops->tcf_block) { 1049 NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); 1050 err = -EOPNOTSUPP; 1051 goto errout_qdisc; 1052 } 1053 1054 errout_rcu: 1055 /* At this point we know that qdisc is not noop_qdisc, 1056 * which means that qdisc holds a reference to net_device 1057 * and we hold a reference to qdisc, so it is safe to release 1058 * rcu read lock. 1059 */ 1060 rcu_read_unlock(); 1061 return err; 1062 1063 errout_qdisc: 1064 rcu_read_unlock(); 1065 1066 if (rtnl_held) 1067 qdisc_put(*q); 1068 else 1069 qdisc_put_unlocked(*q); 1070 *q = NULL; 1071 1072 return err; 1073 } 1074 1075 static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl, 1076 int ifindex, struct netlink_ext_ack *extack) 1077 { 1078 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) 1079 return 0; 1080 1081 /* Do we search for filter, attached to class? */ 1082 if (TC_H_MIN(parent)) { 1083 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1084 1085 *cl = cops->find(q, parent); 1086 if (*cl == 0) { 1087 NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); 1088 return -ENOENT; 1089 } 1090 } 1091 1092 return 0; 1093 } 1094 1095 static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q, 1096 unsigned long cl, int ifindex, 1097 u32 block_index, 1098 struct netlink_ext_ack *extack) 1099 { 1100 struct tcf_block *block; 1101 1102 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { 1103 block = tcf_block_refcnt_get(net, block_index); 1104 if (!block) { 1105 NL_SET_ERR_MSG(extack, "Block of given index was not found"); 1106 return ERR_PTR(-EINVAL); 1107 } 1108 } else { 1109 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1110 1111 block = cops->tcf_block(q, cl, extack); 1112 if (!block) 1113 return ERR_PTR(-EINVAL); 1114 1115 if (tcf_block_shared(block)) { 1116 NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); 1117 return ERR_PTR(-EOPNOTSUPP); 1118 } 1119 1120 /* Always take reference to block in order to support execution 1121 * of rules update path of cls API without rtnl lock. Caller 1122 * must release block when it is finished using it. 'if' block 1123 * of this conditional obtain reference to block by calling 1124 * tcf_block_refcnt_get(). 1125 */ 1126 refcount_inc(&block->refcnt); 1127 } 1128 1129 return block; 1130 } 1131 1132 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, 1133 struct tcf_block_ext_info *ei, bool rtnl_held) 1134 { 1135 if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { 1136 /* Flushing/putting all chains will cause the block to be 1137 * deallocated when last chain is freed. However, if chain_list 1138 * is empty, block has to be manually deallocated. After block 1139 * reference counter reached 0, it is no longer possible to 1140 * increment it or add new chains to block. 1141 */ 1142 bool free_block = list_empty(&block->chain_list); 1143 1144 mutex_unlock(&block->lock); 1145 if (tcf_block_shared(block)) 1146 tcf_block_remove(block, block->net); 1147 1148 if (q) 1149 tcf_block_offload_unbind(block, q, ei); 1150 1151 if (free_block) 1152 tcf_block_destroy(block); 1153 else 1154 tcf_block_flush_all_chains(block, rtnl_held); 1155 } else if (q) { 1156 tcf_block_offload_unbind(block, q, ei); 1157 } 1158 } 1159 1160 static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held) 1161 { 1162 __tcf_block_put(block, NULL, NULL, rtnl_held); 1163 } 1164 1165 /* Find tcf block. 1166 * Set q, parent, cl when appropriate. 1167 */ 1168 1169 static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, 1170 u32 *parent, unsigned long *cl, 1171 int ifindex, u32 block_index, 1172 struct netlink_ext_ack *extack) 1173 { 1174 struct tcf_block *block; 1175 int err = 0; 1176 1177 ASSERT_RTNL(); 1178 1179 err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack); 1180 if (err) 1181 goto errout; 1182 1183 err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack); 1184 if (err) 1185 goto errout_qdisc; 1186 1187 block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); 1188 if (IS_ERR(block)) { 1189 err = PTR_ERR(block); 1190 goto errout_qdisc; 1191 } 1192 1193 return block; 1194 1195 errout_qdisc: 1196 if (*q) 1197 qdisc_put(*q); 1198 errout: 1199 *q = NULL; 1200 return ERR_PTR(err); 1201 } 1202 1203 static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, 1204 bool rtnl_held) 1205 { 1206 if (!IS_ERR_OR_NULL(block)) 1207 tcf_block_refcnt_put(block, rtnl_held); 1208 1209 if (q) { 1210 if (rtnl_held) 1211 qdisc_put(q); 1212 else 1213 qdisc_put_unlocked(q); 1214 } 1215 } 1216 1217 struct tcf_block_owner_item { 1218 struct list_head list; 1219 struct Qdisc *q; 1220 enum flow_block_binder_type binder_type; 1221 }; 1222 1223 static void 1224 tcf_block_owner_netif_keep_dst(struct tcf_block *block, 1225 struct Qdisc *q, 1226 enum flow_block_binder_type binder_type) 1227 { 1228 if (block->keep_dst && 1229 binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && 1230 binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) 1231 netif_keep_dst(qdisc_dev(q)); 1232 } 1233 1234 void tcf_block_netif_keep_dst(struct tcf_block *block) 1235 { 1236 struct tcf_block_owner_item *item; 1237 1238 block->keep_dst = true; 1239 list_for_each_entry(item, &block->owner_list, list) 1240 tcf_block_owner_netif_keep_dst(block, item->q, 1241 item->binder_type); 1242 } 1243 EXPORT_SYMBOL(tcf_block_netif_keep_dst); 1244 1245 static int tcf_block_owner_add(struct tcf_block *block, 1246 struct Qdisc *q, 1247 enum flow_block_binder_type binder_type) 1248 { 1249 struct tcf_block_owner_item *item; 1250 1251 item = kmalloc(sizeof(*item), GFP_KERNEL); 1252 if (!item) 1253 return -ENOMEM; 1254 item->q = q; 1255 item->binder_type = binder_type; 1256 list_add(&item->list, &block->owner_list); 1257 return 0; 1258 } 1259 1260 static void tcf_block_owner_del(struct tcf_block *block, 1261 struct Qdisc *q, 1262 enum flow_block_binder_type binder_type) 1263 { 1264 struct tcf_block_owner_item *item; 1265 1266 list_for_each_entry(item, &block->owner_list, list) { 1267 if (item->q == q && item->binder_type == binder_type) { 1268 list_del(&item->list); 1269 kfree(item); 1270 return; 1271 } 1272 } 1273 WARN_ON(1); 1274 } 1275 1276 int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, 1277 struct tcf_block_ext_info *ei, 1278 struct netlink_ext_ack *extack) 1279 { 1280 struct net *net = qdisc_net(q); 1281 struct tcf_block *block = NULL; 1282 int err; 1283 1284 if (ei->block_index) 1285 /* block_index not 0 means the shared block is requested */ 1286 block = tcf_block_refcnt_get(net, ei->block_index); 1287 1288 if (!block) { 1289 block = tcf_block_create(net, q, ei->block_index, extack); 1290 if (IS_ERR(block)) 1291 return PTR_ERR(block); 1292 if (tcf_block_shared(block)) { 1293 err = tcf_block_insert(block, net, extack); 1294 if (err) 1295 goto err_block_insert; 1296 } 1297 } 1298 1299 err = tcf_block_owner_add(block, q, ei->binder_type); 1300 if (err) 1301 goto err_block_owner_add; 1302 1303 tcf_block_owner_netif_keep_dst(block, q, ei->binder_type); 1304 1305 err = tcf_chain0_head_change_cb_add(block, ei, extack); 1306 if (err) 1307 goto err_chain0_head_change_cb_add; 1308 1309 err = tcf_block_offload_bind(block, q, ei, extack); 1310 if (err) 1311 goto err_block_offload_bind; 1312 1313 *p_block = block; 1314 return 0; 1315 1316 err_block_offload_bind: 1317 tcf_chain0_head_change_cb_del(block, ei); 1318 err_chain0_head_change_cb_add: 1319 tcf_block_owner_del(block, q, ei->binder_type); 1320 err_block_owner_add: 1321 err_block_insert: 1322 tcf_block_refcnt_put(block, true); 1323 return err; 1324 } 1325 EXPORT_SYMBOL(tcf_block_get_ext); 1326 1327 static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) 1328 { 1329 struct tcf_proto __rcu **p_filter_chain = priv; 1330 1331 rcu_assign_pointer(*p_filter_chain, tp_head); 1332 } 1333 1334 int tcf_block_get(struct tcf_block **p_block, 1335 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, 1336 struct netlink_ext_ack *extack) 1337 { 1338 struct tcf_block_ext_info ei = { 1339 .chain_head_change = tcf_chain_head_change_dflt, 1340 .chain_head_change_priv = p_filter_chain, 1341 }; 1342 1343 WARN_ON(!p_filter_chain); 1344 return tcf_block_get_ext(p_block, q, &ei, extack); 1345 } 1346 EXPORT_SYMBOL(tcf_block_get); 1347 1348 /* XXX: Standalone actions are not allowed to jump to any chain, and bound 1349 * actions should be all removed after flushing. 1350 */ 1351 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, 1352 struct tcf_block_ext_info *ei) 1353 { 1354 if (!block) 1355 return; 1356 tcf_chain0_head_change_cb_del(block, ei); 1357 tcf_block_owner_del(block, q, ei->binder_type); 1358 1359 __tcf_block_put(block, q, ei, true); 1360 } 1361 EXPORT_SYMBOL(tcf_block_put_ext); 1362 1363 void tcf_block_put(struct tcf_block *block) 1364 { 1365 struct tcf_block_ext_info ei = {0, }; 1366 1367 if (!block) 1368 return; 1369 tcf_block_put_ext(block, block->q, &ei); 1370 } 1371 1372 EXPORT_SYMBOL(tcf_block_put); 1373 1374 static int 1375 tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, 1376 void *cb_priv, bool add, bool offload_in_use, 1377 struct netlink_ext_ack *extack) 1378 { 1379 struct tcf_chain *chain, *chain_prev; 1380 struct tcf_proto *tp, *tp_prev; 1381 int err; 1382 1383 lockdep_assert_held(&block->cb_lock); 1384 1385 for (chain = __tcf_get_next_chain(block, NULL); 1386 chain; 1387 chain_prev = chain, 1388 chain = __tcf_get_next_chain(block, chain), 1389 tcf_chain_put(chain_prev)) { 1390 for (tp = __tcf_get_next_proto(chain, NULL); tp; 1391 tp_prev = tp, 1392 tp = __tcf_get_next_proto(chain, tp), 1393 tcf_proto_put(tp_prev, true, NULL)) { 1394 if (tp->ops->reoffload) { 1395 err = tp->ops->reoffload(tp, add, cb, cb_priv, 1396 extack); 1397 if (err && add) 1398 goto err_playback_remove; 1399 } else if (add && offload_in_use) { 1400 err = -EOPNOTSUPP; 1401 NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support"); 1402 goto err_playback_remove; 1403 } 1404 } 1405 } 1406 1407 return 0; 1408 1409 err_playback_remove: 1410 tcf_proto_put(tp, true, NULL); 1411 tcf_chain_put(chain); 1412 tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, 1413 extack); 1414 return err; 1415 } 1416 1417 static int tcf_block_bind(struct tcf_block *block, 1418 struct flow_block_offload *bo) 1419 { 1420 struct flow_block_cb *block_cb, *next; 1421 int err, i = 0; 1422 1423 lockdep_assert_held(&block->cb_lock); 1424 1425 list_for_each_entry(block_cb, &bo->cb_list, list) { 1426 err = tcf_block_playback_offloads(block, block_cb->cb, 1427 block_cb->cb_priv, true, 1428 tcf_block_offload_in_use(block), 1429 bo->extack); 1430 if (err) 1431 goto err_unroll; 1432 if (!bo->unlocked_driver_cb) 1433 block->lockeddevcnt++; 1434 1435 i++; 1436 } 1437 list_splice(&bo->cb_list, &block->flow_block.cb_list); 1438 1439 return 0; 1440 1441 err_unroll: 1442 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { 1443 if (i-- > 0) { 1444 list_del(&block_cb->list); 1445 tcf_block_playback_offloads(block, block_cb->cb, 1446 block_cb->cb_priv, false, 1447 tcf_block_offload_in_use(block), 1448 NULL); 1449 if (!bo->unlocked_driver_cb) 1450 block->lockeddevcnt--; 1451 } 1452 flow_block_cb_free(block_cb); 1453 } 1454 1455 return err; 1456 } 1457 1458 static void tcf_block_unbind(struct tcf_block *block, 1459 struct flow_block_offload *bo) 1460 { 1461 struct flow_block_cb *block_cb, *next; 1462 1463 lockdep_assert_held(&block->cb_lock); 1464 1465 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { 1466 tcf_block_playback_offloads(block, block_cb->cb, 1467 block_cb->cb_priv, false, 1468 tcf_block_offload_in_use(block), 1469 NULL); 1470 list_del(&block_cb->list); 1471 flow_block_cb_free(block_cb); 1472 if (!bo->unlocked_driver_cb) 1473 block->lockeddevcnt--; 1474 } 1475 } 1476 1477 static int tcf_block_setup(struct tcf_block *block, 1478 struct flow_block_offload *bo) 1479 { 1480 int err; 1481 1482 switch (bo->command) { 1483 case FLOW_BLOCK_BIND: 1484 err = tcf_block_bind(block, bo); 1485 break; 1486 case FLOW_BLOCK_UNBIND: 1487 err = 0; 1488 tcf_block_unbind(block, bo); 1489 break; 1490 default: 1491 WARN_ON_ONCE(1); 1492 err = -EOPNOTSUPP; 1493 } 1494 1495 return err; 1496 } 1497 1498 /* Main classifier routine: scans classifier chain attached 1499 * to this qdisc, (optionally) tests for protocol and asks 1500 * specific classifiers. 1501 */ 1502 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, 1503 struct tcf_result *res, bool compat_mode) 1504 { 1505 #ifdef CONFIG_NET_CLS_ACT 1506 const int max_reclassify_loop = 4; 1507 const struct tcf_proto *orig_tp = tp; 1508 const struct tcf_proto *first_tp; 1509 int limit = 0; 1510 1511 reclassify: 1512 #endif 1513 for (; tp; tp = rcu_dereference_bh(tp->next)) { 1514 __be16 protocol = tc_skb_protocol(skb); 1515 int err; 1516 1517 if (tp->protocol != protocol && 1518 tp->protocol != htons(ETH_P_ALL)) 1519 continue; 1520 1521 err = tp->classify(skb, tp, res); 1522 #ifdef CONFIG_NET_CLS_ACT 1523 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { 1524 first_tp = orig_tp; 1525 goto reset; 1526 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { 1527 first_tp = res->goto_tp; 1528 1529 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) 1530 { 1531 struct tc_skb_ext *ext; 1532 1533 ext = skb_ext_add(skb, TC_SKB_EXT); 1534 if (WARN_ON_ONCE(!ext)) 1535 return TC_ACT_SHOT; 1536 1537 ext->chain = err & TC_ACT_EXT_VAL_MASK; 1538 } 1539 #endif 1540 goto reset; 1541 } 1542 #endif 1543 if (err >= 0) 1544 return err; 1545 } 1546 1547 return TC_ACT_UNSPEC; /* signal: continue lookup */ 1548 #ifdef CONFIG_NET_CLS_ACT 1549 reset: 1550 if (unlikely(limit++ >= max_reclassify_loop)) { 1551 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n", 1552 tp->chain->block->index, 1553 tp->prio & 0xffff, 1554 ntohs(tp->protocol)); 1555 return TC_ACT_SHOT; 1556 } 1557 1558 tp = first_tp; 1559 goto reclassify; 1560 #endif 1561 } 1562 EXPORT_SYMBOL(tcf_classify); 1563 1564 struct tcf_chain_info { 1565 struct tcf_proto __rcu **pprev; 1566 struct tcf_proto __rcu *next; 1567 }; 1568 1569 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, 1570 struct tcf_chain_info *chain_info) 1571 { 1572 return tcf_chain_dereference(*chain_info->pprev, chain); 1573 } 1574 1575 static int tcf_chain_tp_insert(struct tcf_chain *chain, 1576 struct tcf_chain_info *chain_info, 1577 struct tcf_proto *tp) 1578 { 1579 if (chain->flushing) 1580 return -EAGAIN; 1581 1582 if (*chain_info->pprev == chain->filter_chain) 1583 tcf_chain0_head_change(chain, tp); 1584 tcf_proto_get(tp); 1585 RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); 1586 rcu_assign_pointer(*chain_info->pprev, tp); 1587 1588 return 0; 1589 } 1590 1591 static void tcf_chain_tp_remove(struct tcf_chain *chain, 1592 struct tcf_chain_info *chain_info, 1593 struct tcf_proto *tp) 1594 { 1595 struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); 1596 1597 tcf_proto_mark_delete(tp); 1598 if (tp == chain->filter_chain) 1599 tcf_chain0_head_change(chain, next); 1600 RCU_INIT_POINTER(*chain_info->pprev, next); 1601 } 1602 1603 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, 1604 struct tcf_chain_info *chain_info, 1605 u32 protocol, u32 prio, 1606 bool prio_allocate); 1607 1608 /* Try to insert new proto. 1609 * If proto with specified priority already exists, free new proto 1610 * and return existing one. 1611 */ 1612 1613 static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, 1614 struct tcf_proto *tp_new, 1615 u32 protocol, u32 prio, 1616 bool rtnl_held) 1617 { 1618 struct tcf_chain_info chain_info; 1619 struct tcf_proto *tp; 1620 int err = 0; 1621 1622 mutex_lock(&chain->filter_chain_lock); 1623 1624 tp = tcf_chain_tp_find(chain, &chain_info, 1625 protocol, prio, false); 1626 if (!tp) 1627 err = tcf_chain_tp_insert(chain, &chain_info, tp_new); 1628 mutex_unlock(&chain->filter_chain_lock); 1629 1630 if (tp) { 1631 tcf_proto_destroy(tp_new, rtnl_held, NULL); 1632 tp_new = tp; 1633 } else if (err) { 1634 tcf_proto_destroy(tp_new, rtnl_held, NULL); 1635 tp_new = ERR_PTR(err); 1636 } 1637 1638 return tp_new; 1639 } 1640 1641 static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, 1642 struct tcf_proto *tp, bool rtnl_held, 1643 struct netlink_ext_ack *extack) 1644 { 1645 struct tcf_chain_info chain_info; 1646 struct tcf_proto *tp_iter; 1647 struct tcf_proto **pprev; 1648 struct tcf_proto *next; 1649 1650 mutex_lock(&chain->filter_chain_lock); 1651 1652 /* Atomically find and remove tp from chain. */ 1653 for (pprev = &chain->filter_chain; 1654 (tp_iter = tcf_chain_dereference(*pprev, chain)); 1655 pprev = &tp_iter->next) { 1656 if (tp_iter == tp) { 1657 chain_info.pprev = pprev; 1658 chain_info.next = tp_iter->next; 1659 WARN_ON(tp_iter->deleting); 1660 break; 1661 } 1662 } 1663 /* Verify that tp still exists and no new filters were inserted 1664 * concurrently. 1665 * Mark tp for deletion if it is empty. 1666 */ 1667 if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { 1668 mutex_unlock(&chain->filter_chain_lock); 1669 return; 1670 } 1671 1672 next = tcf_chain_dereference(chain_info.next, chain); 1673 if (tp == chain->filter_chain) 1674 tcf_chain0_head_change(chain, next); 1675 RCU_INIT_POINTER(*chain_info.pprev, next); 1676 mutex_unlock(&chain->filter_chain_lock); 1677 1678 tcf_proto_put(tp, rtnl_held, extack); 1679 } 1680 1681 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, 1682 struct tcf_chain_info *chain_info, 1683 u32 protocol, u32 prio, 1684 bool prio_allocate) 1685 { 1686 struct tcf_proto **pprev; 1687 struct tcf_proto *tp; 1688 1689 /* Check the chain for existence of proto-tcf with this priority */ 1690 for (pprev = &chain->filter_chain; 1691 (tp = tcf_chain_dereference(*pprev, chain)); 1692 pprev = &tp->next) { 1693 if (tp->prio >= prio) { 1694 if (tp->prio == prio) { 1695 if (prio_allocate || 1696 (tp->protocol != protocol && protocol)) 1697 return ERR_PTR(-EINVAL); 1698 } else { 1699 tp = NULL; 1700 } 1701 break; 1702 } 1703 } 1704 chain_info->pprev = pprev; 1705 if (tp) { 1706 chain_info->next = tp->next; 1707 tcf_proto_get(tp); 1708 } else { 1709 chain_info->next = NULL; 1710 } 1711 return tp; 1712 } 1713 1714 static int tcf_fill_node(struct net *net, struct sk_buff *skb, 1715 struct tcf_proto *tp, struct tcf_block *block, 1716 struct Qdisc *q, u32 parent, void *fh, 1717 u32 portid, u32 seq, u16 flags, int event, 1718 bool rtnl_held) 1719 { 1720 struct tcmsg *tcm; 1721 struct nlmsghdr *nlh; 1722 unsigned char *b = skb_tail_pointer(skb); 1723 1724 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1725 if (!nlh) 1726 goto out_nlmsg_trim; 1727 tcm = nlmsg_data(nlh); 1728 tcm->tcm_family = AF_UNSPEC; 1729 tcm->tcm__pad1 = 0; 1730 tcm->tcm__pad2 = 0; 1731 if (q) { 1732 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1733 tcm->tcm_parent = parent; 1734 } else { 1735 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; 1736 tcm->tcm_block_index = block->index; 1737 } 1738 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); 1739 if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) 1740 goto nla_put_failure; 1741 if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) 1742 goto nla_put_failure; 1743 if (!fh) { 1744 tcm->tcm_handle = 0; 1745 } else { 1746 if (tp->ops->dump && 1747 tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0) 1748 goto nla_put_failure; 1749 } 1750 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1751 return skb->len; 1752 1753 out_nlmsg_trim: 1754 nla_put_failure: 1755 nlmsg_trim(skb, b); 1756 return -1; 1757 } 1758 1759 static int tfilter_notify(struct net *net, struct sk_buff *oskb, 1760 struct nlmsghdr *n, struct tcf_proto *tp, 1761 struct tcf_block *block, struct Qdisc *q, 1762 u32 parent, void *fh, int event, bool unicast, 1763 bool rtnl_held) 1764 { 1765 struct sk_buff *skb; 1766 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1767 int err = 0; 1768 1769 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1770 if (!skb) 1771 return -ENOBUFS; 1772 1773 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, 1774 n->nlmsg_seq, n->nlmsg_flags, event, 1775 rtnl_held) <= 0) { 1776 kfree_skb(skb); 1777 return -EINVAL; 1778 } 1779 1780 if (unicast) 1781 err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); 1782 else 1783 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1784 n->nlmsg_flags & NLM_F_ECHO); 1785 1786 if (err > 0) 1787 err = 0; 1788 return err; 1789 } 1790 1791 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, 1792 struct nlmsghdr *n, struct tcf_proto *tp, 1793 struct tcf_block *block, struct Qdisc *q, 1794 u32 parent, void *fh, bool unicast, bool *last, 1795 bool rtnl_held, struct netlink_ext_ack *extack) 1796 { 1797 struct sk_buff *skb; 1798 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1799 int err; 1800 1801 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1802 if (!skb) 1803 return -ENOBUFS; 1804 1805 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, 1806 n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER, 1807 rtnl_held) <= 0) { 1808 NL_SET_ERR_MSG(extack, "Failed to build del event notification"); 1809 kfree_skb(skb); 1810 return -EINVAL; 1811 } 1812 1813 err = tp->ops->delete(tp, fh, last, rtnl_held, extack); 1814 if (err) { 1815 kfree_skb(skb); 1816 return err; 1817 } 1818 1819 if (unicast) 1820 err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); 1821 else 1822 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1823 n->nlmsg_flags & NLM_F_ECHO); 1824 if (err < 0) 1825 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); 1826 1827 if (err > 0) 1828 err = 0; 1829 return err; 1830 } 1831 1832 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, 1833 struct tcf_block *block, struct Qdisc *q, 1834 u32 parent, struct nlmsghdr *n, 1835 struct tcf_chain *chain, int event, 1836 bool rtnl_held) 1837 { 1838 struct tcf_proto *tp; 1839 1840 for (tp = tcf_get_next_proto(chain, NULL, rtnl_held); 1841 tp; tp = tcf_get_next_proto(chain, tp, rtnl_held)) 1842 tfilter_notify(net, oskb, n, tp, block, 1843 q, parent, NULL, event, false, rtnl_held); 1844 } 1845 1846 static void tfilter_put(struct tcf_proto *tp, void *fh) 1847 { 1848 if (tp->ops->put && fh) 1849 tp->ops->put(tp, fh); 1850 } 1851 1852 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, 1853 struct netlink_ext_ack *extack) 1854 { 1855 struct net *net = sock_net(skb->sk); 1856 struct nlattr *tca[TCA_MAX + 1]; 1857 char name[IFNAMSIZ]; 1858 struct tcmsg *t; 1859 u32 protocol; 1860 u32 prio; 1861 bool prio_allocate; 1862 u32 parent; 1863 u32 chain_index; 1864 struct Qdisc *q = NULL; 1865 struct tcf_chain_info chain_info; 1866 struct tcf_chain *chain = NULL; 1867 struct tcf_block *block; 1868 struct tcf_proto *tp; 1869 unsigned long cl; 1870 void *fh; 1871 int err; 1872 int tp_created; 1873 bool rtnl_held = false; 1874 1875 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1876 return -EPERM; 1877 1878 replay: 1879 tp_created = 0; 1880 1881 err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, 1882 rtm_tca_policy, extack); 1883 if (err < 0) 1884 return err; 1885 1886 t = nlmsg_data(n); 1887 protocol = TC_H_MIN(t->tcm_info); 1888 prio = TC_H_MAJ(t->tcm_info); 1889 prio_allocate = false; 1890 parent = t->tcm_parent; 1891 tp = NULL; 1892 cl = 0; 1893 block = NULL; 1894 1895 if (prio == 0) { 1896 /* If no priority is provided by the user, 1897 * we allocate one. 1898 */ 1899 if (n->nlmsg_flags & NLM_F_CREATE) { 1900 prio = TC_H_MAKE(0x80000000U, 0U); 1901 prio_allocate = true; 1902 } else { 1903 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); 1904 return -ENOENT; 1905 } 1906 } 1907 1908 /* Find head of filter chain. */ 1909 1910 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); 1911 if (err) 1912 return err; 1913 1914 if (tcf_proto_check_kind(tca[TCA_KIND], name)) { 1915 NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); 1916 err = -EINVAL; 1917 goto errout; 1918 } 1919 1920 /* Take rtnl mutex if rtnl_held was set to true on previous iteration, 1921 * block is shared (no qdisc found), qdisc is not unlocked, classifier 1922 * type is not specified, classifier is not unlocked. 1923 */ 1924 if (rtnl_held || 1925 (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || 1926 !tcf_proto_is_unlocked(name)) { 1927 rtnl_held = true; 1928 rtnl_lock(); 1929 } 1930 1931 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); 1932 if (err) 1933 goto errout; 1934 1935 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, 1936 extack); 1937 if (IS_ERR(block)) { 1938 err = PTR_ERR(block); 1939 goto errout; 1940 } 1941 1942 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 1943 if (chain_index > TC_ACT_EXT_VAL_MASK) { 1944 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); 1945 err = -EINVAL; 1946 goto errout; 1947 } 1948 chain = tcf_chain_get(block, chain_index, true); 1949 if (!chain) { 1950 NL_SET_ERR_MSG(extack, "Cannot create specified filter chain"); 1951 err = -ENOMEM; 1952 goto errout; 1953 } 1954 1955 mutex_lock(&chain->filter_chain_lock); 1956 tp = tcf_chain_tp_find(chain, &chain_info, protocol, 1957 prio, prio_allocate); 1958 if (IS_ERR(tp)) { 1959 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); 1960 err = PTR_ERR(tp); 1961 goto errout_locked; 1962 } 1963 1964 if (tp == NULL) { 1965 struct tcf_proto *tp_new = NULL; 1966 1967 if (chain->flushing) { 1968 err = -EAGAIN; 1969 goto errout_locked; 1970 } 1971 1972 /* Proto-tcf does not exist, create new one */ 1973 1974 if (tca[TCA_KIND] == NULL || !protocol) { 1975 NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); 1976 err = -EINVAL; 1977 goto errout_locked; 1978 } 1979 1980 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 1981 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); 1982 err = -ENOENT; 1983 goto errout_locked; 1984 } 1985 1986 if (prio_allocate) 1987 prio = tcf_auto_prio(tcf_chain_tp_prev(chain, 1988 &chain_info)); 1989 1990 mutex_unlock(&chain->filter_chain_lock); 1991 tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), 1992 protocol, prio, chain, rtnl_held, 1993 extack); 1994 if (IS_ERR(tp_new)) { 1995 err = PTR_ERR(tp_new); 1996 goto errout_tp; 1997 } 1998 1999 tp_created = 1; 2000 tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio, 2001 rtnl_held); 2002 if (IS_ERR(tp)) { 2003 err = PTR_ERR(tp); 2004 goto errout_tp; 2005 } 2006 } else { 2007 mutex_unlock(&chain->filter_chain_lock); 2008 } 2009 2010 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { 2011 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); 2012 err = -EINVAL; 2013 goto errout; 2014 } 2015 2016 fh = tp->ops->get(tp, t->tcm_handle); 2017 2018 if (!fh) { 2019 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 2020 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); 2021 err = -ENOENT; 2022 goto errout; 2023 } 2024 } else if (n->nlmsg_flags & NLM_F_EXCL) { 2025 tfilter_put(tp, fh); 2026 NL_SET_ERR_MSG(extack, "Filter already exists"); 2027 err = -EEXIST; 2028 goto errout; 2029 } 2030 2031 if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) { 2032 NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind"); 2033 err = -EINVAL; 2034 goto errout; 2035 } 2036 2037 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, 2038 n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, 2039 rtnl_held, extack); 2040 if (err == 0) { 2041 tfilter_notify(net, skb, n, tp, block, q, parent, fh, 2042 RTM_NEWTFILTER, false, rtnl_held); 2043 tfilter_put(tp, fh); 2044 /* q pointer is NULL for shared blocks */ 2045 if (q) 2046 q->flags &= ~TCQ_F_CAN_BYPASS; 2047 } 2048 2049 errout: 2050 if (err && tp_created) 2051 tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL); 2052 errout_tp: 2053 if (chain) { 2054 if (tp && !IS_ERR(tp)) 2055 tcf_proto_put(tp, rtnl_held, NULL); 2056 if (!tp_created) 2057 tcf_chain_put(chain); 2058 } 2059 tcf_block_release(q, block, rtnl_held); 2060 2061 if (rtnl_held) 2062 rtnl_unlock(); 2063 2064 if (err == -EAGAIN) { 2065 /* Take rtnl lock in case EAGAIN is caused by concurrent flush 2066 * of target chain. 2067 */ 2068 rtnl_held = true; 2069 /* Replay the request. */ 2070 goto replay; 2071 } 2072 return err; 2073 2074 errout_locked: 2075 mutex_unlock(&chain->filter_chain_lock); 2076 goto errout; 2077 } 2078 2079 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, 2080 struct netlink_ext_ack *extack) 2081 { 2082 struct net *net = sock_net(skb->sk); 2083 struct nlattr *tca[TCA_MAX + 1]; 2084 char name[IFNAMSIZ]; 2085 struct tcmsg *t; 2086 u32 protocol; 2087 u32 prio; 2088 u32 parent; 2089 u32 chain_index; 2090 struct Qdisc *q = NULL; 2091 struct tcf_chain_info chain_info; 2092 struct tcf_chain *chain = NULL; 2093 struct tcf_block *block = NULL; 2094 struct tcf_proto *tp = NULL; 2095 unsigned long cl = 0; 2096 void *fh = NULL; 2097 int err; 2098 bool rtnl_held = false; 2099 2100 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 2101 return -EPERM; 2102 2103 err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, 2104 rtm_tca_policy, extack); 2105 if (err < 0) 2106 return err; 2107 2108 t = nlmsg_data(n); 2109 protocol = TC_H_MIN(t->tcm_info); 2110 prio = TC_H_MAJ(t->tcm_info); 2111 parent = t->tcm_parent; 2112 2113 if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) { 2114 NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set"); 2115 return -ENOENT; 2116 } 2117 2118 /* Find head of filter chain. */ 2119 2120 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); 2121 if (err) 2122 return err; 2123 2124 if (tcf_proto_check_kind(tca[TCA_KIND], name)) { 2125 NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); 2126 err = -EINVAL; 2127 goto errout; 2128 } 2129 /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc 2130 * found), qdisc is not unlocked, classifier type is not specified, 2131 * classifier is not unlocked. 2132 */ 2133 if (!prio || 2134 (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || 2135 !tcf_proto_is_unlocked(name)) { 2136 rtnl_held = true; 2137 rtnl_lock(); 2138 } 2139 2140 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); 2141 if (err) 2142 goto errout; 2143 2144 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, 2145 extack); 2146 if (IS_ERR(block)) { 2147 err = PTR_ERR(block); 2148 goto errout; 2149 } 2150 2151 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 2152 if (chain_index > TC_ACT_EXT_VAL_MASK) { 2153 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); 2154 err = -EINVAL; 2155 goto errout; 2156 } 2157 chain = tcf_chain_get(block, chain_index, false); 2158 if (!chain) { 2159 /* User requested flush on non-existent chain. Nothing to do, 2160 * so just return success. 2161 */ 2162 if (prio == 0) { 2163 err = 0; 2164 goto errout; 2165 } 2166 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); 2167 err = -ENOENT; 2168 goto errout; 2169 } 2170 2171 if (prio == 0) { 2172 tfilter_notify_chain(net, skb, block, q, parent, n, 2173 chain, RTM_DELTFILTER, rtnl_held); 2174 tcf_chain_flush(chain, rtnl_held); 2175 err = 0; 2176 goto errout; 2177 } 2178 2179 mutex_lock(&chain->filter_chain_lock); 2180 tp = tcf_chain_tp_find(chain, &chain_info, protocol, 2181 prio, false); 2182 if (!tp || IS_ERR(tp)) { 2183 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); 2184 err = tp ? PTR_ERR(tp) : -ENOENT; 2185 goto errout_locked; 2186 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { 2187 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); 2188 err = -EINVAL; 2189 goto errout_locked; 2190 } else if (t->tcm_handle == 0) { 2191 tcf_chain_tp_remove(chain, &chain_info, tp); 2192 mutex_unlock(&chain->filter_chain_lock); 2193 2194 tcf_proto_put(tp, rtnl_held, NULL); 2195 tfilter_notify(net, skb, n, tp, block, q, parent, fh, 2196 RTM_DELTFILTER, false, rtnl_held); 2197 err = 0; 2198 goto errout; 2199 } 2200 mutex_unlock(&chain->filter_chain_lock); 2201 2202 fh = tp->ops->get(tp, t->tcm_handle); 2203 2204 if (!fh) { 2205 NL_SET_ERR_MSG(extack, "Specified filter handle not found"); 2206 err = -ENOENT; 2207 } else { 2208 bool last; 2209 2210 err = tfilter_del_notify(net, skb, n, tp, block, 2211 q, parent, fh, false, &last, 2212 rtnl_held, extack); 2213 2214 if (err) 2215 goto errout; 2216 if (last) 2217 tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack); 2218 } 2219 2220 errout: 2221 if (chain) { 2222 if (tp && !IS_ERR(tp)) 2223 tcf_proto_put(tp, rtnl_held, NULL); 2224 tcf_chain_put(chain); 2225 } 2226 tcf_block_release(q, block, rtnl_held); 2227 2228 if (rtnl_held) 2229 rtnl_unlock(); 2230 2231 return err; 2232 2233 errout_locked: 2234 mutex_unlock(&chain->filter_chain_lock); 2235 goto errout; 2236 } 2237 2238 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, 2239 struct netlink_ext_ack *extack) 2240 { 2241 struct net *net = sock_net(skb->sk); 2242 struct nlattr *tca[TCA_MAX + 1]; 2243 char name[IFNAMSIZ]; 2244 struct tcmsg *t; 2245 u32 protocol; 2246 u32 prio; 2247 u32 parent; 2248 u32 chain_index; 2249 struct Qdisc *q = NULL; 2250 struct tcf_chain_info chain_info; 2251 struct tcf_chain *chain = NULL; 2252 struct tcf_block *block = NULL; 2253 struct tcf_proto *tp = NULL; 2254 unsigned long cl = 0; 2255 void *fh = NULL; 2256 int err; 2257 bool rtnl_held = false; 2258 2259 err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, 2260 rtm_tca_policy, extack); 2261 if (err < 0) 2262 return err; 2263 2264 t = nlmsg_data(n); 2265 protocol = TC_H_MIN(t->tcm_info); 2266 prio = TC_H_MAJ(t->tcm_info); 2267 parent = t->tcm_parent; 2268 2269 if (prio == 0) { 2270 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero"); 2271 return -ENOENT; 2272 } 2273 2274 /* Find head of filter chain. */ 2275 2276 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); 2277 if (err) 2278 return err; 2279 2280 if (tcf_proto_check_kind(tca[TCA_KIND], name)) { 2281 NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); 2282 err = -EINVAL; 2283 goto errout; 2284 } 2285 /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not 2286 * unlocked, classifier type is not specified, classifier is not 2287 * unlocked. 2288 */ 2289 if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || 2290 !tcf_proto_is_unlocked(name)) { 2291 rtnl_held = true; 2292 rtnl_lock(); 2293 } 2294 2295 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); 2296 if (err) 2297 goto errout; 2298 2299 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, 2300 extack); 2301 if (IS_ERR(block)) { 2302 err = PTR_ERR(block); 2303 goto errout; 2304 } 2305 2306 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 2307 if (chain_index > TC_ACT_EXT_VAL_MASK) { 2308 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); 2309 err = -EINVAL; 2310 goto errout; 2311 } 2312 chain = tcf_chain_get(block, chain_index, false); 2313 if (!chain) { 2314 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); 2315 err = -EINVAL; 2316 goto errout; 2317 } 2318 2319 mutex_lock(&chain->filter_chain_lock); 2320 tp = tcf_chain_tp_find(chain, &chain_info, protocol, 2321 prio, false); 2322 mutex_unlock(&chain->filter_chain_lock); 2323 if (!tp || IS_ERR(tp)) { 2324 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); 2325 err = tp ? PTR_ERR(tp) : -ENOENT; 2326 goto errout; 2327 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { 2328 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); 2329 err = -EINVAL; 2330 goto errout; 2331 } 2332 2333 fh = tp->ops->get(tp, t->tcm_handle); 2334 2335 if (!fh) { 2336 NL_SET_ERR_MSG(extack, "Specified filter handle not found"); 2337 err = -ENOENT; 2338 } else { 2339 err = tfilter_notify(net, skb, n, tp, block, q, parent, 2340 fh, RTM_NEWTFILTER, true, rtnl_held); 2341 if (err < 0) 2342 NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); 2343 } 2344 2345 tfilter_put(tp, fh); 2346 errout: 2347 if (chain) { 2348 if (tp && !IS_ERR(tp)) 2349 tcf_proto_put(tp, rtnl_held, NULL); 2350 tcf_chain_put(chain); 2351 } 2352 tcf_block_release(q, block, rtnl_held); 2353 2354 if (rtnl_held) 2355 rtnl_unlock(); 2356 2357 return err; 2358 } 2359 2360 struct tcf_dump_args { 2361 struct tcf_walker w; 2362 struct sk_buff *skb; 2363 struct netlink_callback *cb; 2364 struct tcf_block *block; 2365 struct Qdisc *q; 2366 u32 parent; 2367 }; 2368 2369 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 2370 { 2371 struct tcf_dump_args *a = (void *)arg; 2372 struct net *net = sock_net(a->skb->sk); 2373 2374 return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, 2375 n, NETLINK_CB(a->cb->skb).portid, 2376 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 2377 RTM_NEWTFILTER, true); 2378 } 2379 2380 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, 2381 struct sk_buff *skb, struct netlink_callback *cb, 2382 long index_start, long *p_index) 2383 { 2384 struct net *net = sock_net(skb->sk); 2385 struct tcf_block *block = chain->block; 2386 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2387 struct tcf_proto *tp, *tp_prev; 2388 struct tcf_dump_args arg; 2389 2390 for (tp = __tcf_get_next_proto(chain, NULL); 2391 tp; 2392 tp_prev = tp, 2393 tp = __tcf_get_next_proto(chain, tp), 2394 tcf_proto_put(tp_prev, true, NULL), 2395 (*p_index)++) { 2396 if (*p_index < index_start) 2397 continue; 2398 if (TC_H_MAJ(tcm->tcm_info) && 2399 TC_H_MAJ(tcm->tcm_info) != tp->prio) 2400 continue; 2401 if (TC_H_MIN(tcm->tcm_info) && 2402 TC_H_MIN(tcm->tcm_info) != tp->protocol) 2403 continue; 2404 if (*p_index > index_start) 2405 memset(&cb->args[1], 0, 2406 sizeof(cb->args) - sizeof(cb->args[0])); 2407 if (cb->args[1] == 0) { 2408 if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, 2409 NETLINK_CB(cb->skb).portid, 2410 cb->nlh->nlmsg_seq, NLM_F_MULTI, 2411 RTM_NEWTFILTER, true) <= 0) 2412 goto errout; 2413 cb->args[1] = 1; 2414 } 2415 if (!tp->ops->walk) 2416 continue; 2417 arg.w.fn = tcf_node_dump; 2418 arg.skb = skb; 2419 arg.cb = cb; 2420 arg.block = block; 2421 arg.q = q; 2422 arg.parent = parent; 2423 arg.w.stop = 0; 2424 arg.w.skip = cb->args[1] - 1; 2425 arg.w.count = 0; 2426 arg.w.cookie = cb->args[2]; 2427 tp->ops->walk(tp, &arg.w, true); 2428 cb->args[2] = arg.w.cookie; 2429 cb->args[1] = arg.w.count + 1; 2430 if (arg.w.stop) 2431 goto errout; 2432 } 2433 return true; 2434 2435 errout: 2436 tcf_proto_put(tp, true, NULL); 2437 return false; 2438 } 2439 2440 /* called with RTNL */ 2441 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) 2442 { 2443 struct tcf_chain *chain, *chain_prev; 2444 struct net *net = sock_net(skb->sk); 2445 struct nlattr *tca[TCA_MAX + 1]; 2446 struct Qdisc *q = NULL; 2447 struct tcf_block *block; 2448 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2449 long index_start; 2450 long index; 2451 u32 parent; 2452 int err; 2453 2454 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2455 return skb->len; 2456 2457 err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, 2458 NULL, cb->extack); 2459 if (err) 2460 return err; 2461 2462 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { 2463 block = tcf_block_refcnt_get(net, tcm->tcm_block_index); 2464 if (!block) 2465 goto out; 2466 /* If we work with block index, q is NULL and parent value 2467 * will never be used in the following code. The check 2468 * in tcf_fill_node prevents it. However, compiler does not 2469 * see that far, so set parent to zero to silence the warning 2470 * about parent being uninitialized. 2471 */ 2472 parent = 0; 2473 } else { 2474 const struct Qdisc_class_ops *cops; 2475 struct net_device *dev; 2476 unsigned long cl = 0; 2477 2478 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 2479 if (!dev) 2480 return skb->len; 2481 2482 parent = tcm->tcm_parent; 2483 if (!parent) { 2484 q = dev->qdisc; 2485 parent = q->handle; 2486 } else { 2487 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 2488 } 2489 if (!q) 2490 goto out; 2491 cops = q->ops->cl_ops; 2492 if (!cops) 2493 goto out; 2494 if (!cops->tcf_block) 2495 goto out; 2496 if (TC_H_MIN(tcm->tcm_parent)) { 2497 cl = cops->find(q, tcm->tcm_parent); 2498 if (cl == 0) 2499 goto out; 2500 } 2501 block = cops->tcf_block(q, cl, NULL); 2502 if (!block) 2503 goto out; 2504 if (tcf_block_shared(block)) 2505 q = NULL; 2506 } 2507 2508 index_start = cb->args[0]; 2509 index = 0; 2510 2511 for (chain = __tcf_get_next_chain(block, NULL); 2512 chain; 2513 chain_prev = chain, 2514 chain = __tcf_get_next_chain(block, chain), 2515 tcf_chain_put(chain_prev)) { 2516 if (tca[TCA_CHAIN] && 2517 nla_get_u32(tca[TCA_CHAIN]) != chain->index) 2518 continue; 2519 if (!tcf_chain_dump(chain, q, parent, skb, cb, 2520 index_start, &index)) { 2521 tcf_chain_put(chain); 2522 err = -EMSGSIZE; 2523 break; 2524 } 2525 } 2526 2527 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) 2528 tcf_block_refcnt_put(block, true); 2529 cb->args[0] = index; 2530 2531 out: 2532 /* If we did no progress, the error (EMSGSIZE) is real */ 2533 if (skb->len == 0 && err) 2534 return err; 2535 return skb->len; 2536 } 2537 2538 static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, 2539 void *tmplt_priv, u32 chain_index, 2540 struct net *net, struct sk_buff *skb, 2541 struct tcf_block *block, 2542 u32 portid, u32 seq, u16 flags, int event) 2543 { 2544 unsigned char *b = skb_tail_pointer(skb); 2545 const struct tcf_proto_ops *ops; 2546 struct nlmsghdr *nlh; 2547 struct tcmsg *tcm; 2548 void *priv; 2549 2550 ops = tmplt_ops; 2551 priv = tmplt_priv; 2552 2553 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 2554 if (!nlh) 2555 goto out_nlmsg_trim; 2556 tcm = nlmsg_data(nlh); 2557 tcm->tcm_family = AF_UNSPEC; 2558 tcm->tcm__pad1 = 0; 2559 tcm->tcm__pad2 = 0; 2560 tcm->tcm_handle = 0; 2561 if (block->q) { 2562 tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex; 2563 tcm->tcm_parent = block->q->handle; 2564 } else { 2565 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK; 2566 tcm->tcm_block_index = block->index; 2567 } 2568 2569 if (nla_put_u32(skb, TCA_CHAIN, chain_index)) 2570 goto nla_put_failure; 2571 2572 if (ops) { 2573 if (nla_put_string(skb, TCA_KIND, ops->kind)) 2574 goto nla_put_failure; 2575 if (ops->tmplt_dump(skb, net, priv) < 0) 2576 goto nla_put_failure; 2577 } 2578 2579 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 2580 return skb->len; 2581 2582 out_nlmsg_trim: 2583 nla_put_failure: 2584 nlmsg_trim(skb, b); 2585 return -EMSGSIZE; 2586 } 2587 2588 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, 2589 u32 seq, u16 flags, int event, bool unicast) 2590 { 2591 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 2592 struct tcf_block *block = chain->block; 2593 struct net *net = block->net; 2594 struct sk_buff *skb; 2595 int err = 0; 2596 2597 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2598 if (!skb) 2599 return -ENOBUFS; 2600 2601 if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, 2602 chain->index, net, skb, block, portid, 2603 seq, flags, event) <= 0) { 2604 kfree_skb(skb); 2605 return -EINVAL; 2606 } 2607 2608 if (unicast) 2609 err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); 2610 else 2611 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 2612 flags & NLM_F_ECHO); 2613 2614 if (err > 0) 2615 err = 0; 2616 return err; 2617 } 2618 2619 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, 2620 void *tmplt_priv, u32 chain_index, 2621 struct tcf_block *block, struct sk_buff *oskb, 2622 u32 seq, u16 flags, bool unicast) 2623 { 2624 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 2625 struct net *net = block->net; 2626 struct sk_buff *skb; 2627 2628 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2629 if (!skb) 2630 return -ENOBUFS; 2631 2632 if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb, 2633 block, portid, seq, flags, RTM_DELCHAIN) <= 0) { 2634 kfree_skb(skb); 2635 return -EINVAL; 2636 } 2637 2638 if (unicast) 2639 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); 2640 2641 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); 2642 } 2643 2644 static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, 2645 struct nlattr **tca, 2646 struct netlink_ext_ack *extack) 2647 { 2648 const struct tcf_proto_ops *ops; 2649 void *tmplt_priv; 2650 2651 /* If kind is not set, user did not specify template. */ 2652 if (!tca[TCA_KIND]) 2653 return 0; 2654 2655 ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack); 2656 if (IS_ERR(ops)) 2657 return PTR_ERR(ops); 2658 if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { 2659 NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); 2660 return -EOPNOTSUPP; 2661 } 2662 2663 tmplt_priv = ops->tmplt_create(net, chain, tca, extack); 2664 if (IS_ERR(tmplt_priv)) { 2665 module_put(ops->owner); 2666 return PTR_ERR(tmplt_priv); 2667 } 2668 chain->tmplt_ops = ops; 2669 chain->tmplt_priv = tmplt_priv; 2670 return 0; 2671 } 2672 2673 static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, 2674 void *tmplt_priv) 2675 { 2676 /* If template ops are set, no work to do for us. */ 2677 if (!tmplt_ops) 2678 return; 2679 2680 tmplt_ops->tmplt_destroy(tmplt_priv); 2681 module_put(tmplt_ops->owner); 2682 } 2683 2684 /* Add/delete/get a chain */ 2685 2686 static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, 2687 struct netlink_ext_ack *extack) 2688 { 2689 struct net *net = sock_net(skb->sk); 2690 struct nlattr *tca[TCA_MAX + 1]; 2691 struct tcmsg *t; 2692 u32 parent; 2693 u32 chain_index; 2694 struct Qdisc *q = NULL; 2695 struct tcf_chain *chain = NULL; 2696 struct tcf_block *block; 2697 unsigned long cl; 2698 int err; 2699 2700 if (n->nlmsg_type != RTM_GETCHAIN && 2701 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 2702 return -EPERM; 2703 2704 replay: 2705 err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, 2706 rtm_tca_policy, extack); 2707 if (err < 0) 2708 return err; 2709 2710 t = nlmsg_data(n); 2711 parent = t->tcm_parent; 2712 cl = 0; 2713 2714 block = tcf_block_find(net, &q, &parent, &cl, 2715 t->tcm_ifindex, t->tcm_block_index, extack); 2716 if (IS_ERR(block)) 2717 return PTR_ERR(block); 2718 2719 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; 2720 if (chain_index > TC_ACT_EXT_VAL_MASK) { 2721 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); 2722 err = -EINVAL; 2723 goto errout_block; 2724 } 2725 2726 mutex_lock(&block->lock); 2727 chain = tcf_chain_lookup(block, chain_index); 2728 if (n->nlmsg_type == RTM_NEWCHAIN) { 2729 if (chain) { 2730 if (tcf_chain_held_by_acts_only(chain)) { 2731 /* The chain exists only because there is 2732 * some action referencing it. 2733 */ 2734 tcf_chain_hold(chain); 2735 } else { 2736 NL_SET_ERR_MSG(extack, "Filter chain already exists"); 2737 err = -EEXIST; 2738 goto errout_block_locked; 2739 } 2740 } else { 2741 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 2742 NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); 2743 err = -ENOENT; 2744 goto errout_block_locked; 2745 } 2746 chain = tcf_chain_create(block, chain_index); 2747 if (!chain) { 2748 NL_SET_ERR_MSG(extack, "Failed to create filter chain"); 2749 err = -ENOMEM; 2750 goto errout_block_locked; 2751 } 2752 } 2753 } else { 2754 if (!chain || tcf_chain_held_by_acts_only(chain)) { 2755 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); 2756 err = -EINVAL; 2757 goto errout_block_locked; 2758 } 2759 tcf_chain_hold(chain); 2760 } 2761 2762 if (n->nlmsg_type == RTM_NEWCHAIN) { 2763 /* Modifying chain requires holding parent block lock. In case 2764 * the chain was successfully added, take a reference to the 2765 * chain. This ensures that an empty chain does not disappear at 2766 * the end of this function. 2767 */ 2768 tcf_chain_hold(chain); 2769 chain->explicitly_created = true; 2770 } 2771 mutex_unlock(&block->lock); 2772 2773 switch (n->nlmsg_type) { 2774 case RTM_NEWCHAIN: 2775 err = tc_chain_tmplt_add(chain, net, tca, extack); 2776 if (err) { 2777 tcf_chain_put_explicitly_created(chain); 2778 goto errout; 2779 } 2780 2781 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, 2782 RTM_NEWCHAIN, false); 2783 break; 2784 case RTM_DELCHAIN: 2785 tfilter_notify_chain(net, skb, block, q, parent, n, 2786 chain, RTM_DELTFILTER, true); 2787 /* Flush the chain first as the user requested chain removal. */ 2788 tcf_chain_flush(chain, true); 2789 /* In case the chain was successfully deleted, put a reference 2790 * to the chain previously taken during addition. 2791 */ 2792 tcf_chain_put_explicitly_created(chain); 2793 break; 2794 case RTM_GETCHAIN: 2795 err = tc_chain_notify(chain, skb, n->nlmsg_seq, 2796 n->nlmsg_seq, n->nlmsg_type, true); 2797 if (err < 0) 2798 NL_SET_ERR_MSG(extack, "Failed to send chain notify message"); 2799 break; 2800 default: 2801 err = -EOPNOTSUPP; 2802 NL_SET_ERR_MSG(extack, "Unsupported message type"); 2803 goto errout; 2804 } 2805 2806 errout: 2807 tcf_chain_put(chain); 2808 errout_block: 2809 tcf_block_release(q, block, true); 2810 if (err == -EAGAIN) 2811 /* Replay the request. */ 2812 goto replay; 2813 return err; 2814 2815 errout_block_locked: 2816 mutex_unlock(&block->lock); 2817 goto errout_block; 2818 } 2819 2820 /* called with RTNL */ 2821 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) 2822 { 2823 struct net *net = sock_net(skb->sk); 2824 struct nlattr *tca[TCA_MAX + 1]; 2825 struct Qdisc *q = NULL; 2826 struct tcf_block *block; 2827 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2828 struct tcf_chain *chain; 2829 long index_start; 2830 long index; 2831 u32 parent; 2832 int err; 2833 2834 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2835 return skb->len; 2836 2837 err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX, 2838 rtm_tca_policy, cb->extack); 2839 if (err) 2840 return err; 2841 2842 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) { 2843 block = tcf_block_refcnt_get(net, tcm->tcm_block_index); 2844 if (!block) 2845 goto out; 2846 /* If we work with block index, q is NULL and parent value 2847 * will never be used in the following code. The check 2848 * in tcf_fill_node prevents it. However, compiler does not 2849 * see that far, so set parent to zero to silence the warning 2850 * about parent being uninitialized. 2851 */ 2852 parent = 0; 2853 } else { 2854 const struct Qdisc_class_ops *cops; 2855 struct net_device *dev; 2856 unsigned long cl = 0; 2857 2858 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 2859 if (!dev) 2860 return skb->len; 2861 2862 parent = tcm->tcm_parent; 2863 if (!parent) { 2864 q = dev->qdisc; 2865 parent = q->handle; 2866 } else { 2867 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 2868 } 2869 if (!q) 2870 goto out; 2871 cops = q->ops->cl_ops; 2872 if (!cops) 2873 goto out; 2874 if (!cops->tcf_block) 2875 goto out; 2876 if (TC_H_MIN(tcm->tcm_parent)) { 2877 cl = cops->find(q, tcm->tcm_parent); 2878 if (cl == 0) 2879 goto out; 2880 } 2881 block = cops->tcf_block(q, cl, NULL); 2882 if (!block) 2883 goto out; 2884 if (tcf_block_shared(block)) 2885 q = NULL; 2886 } 2887 2888 index_start = cb->args[0]; 2889 index = 0; 2890 2891 mutex_lock(&block->lock); 2892 list_for_each_entry(chain, &block->chain_list, list) { 2893 if ((tca[TCA_CHAIN] && 2894 nla_get_u32(tca[TCA_CHAIN]) != chain->index)) 2895 continue; 2896 if (index < index_start) { 2897 index++; 2898 continue; 2899 } 2900 if (tcf_chain_held_by_acts_only(chain)) 2901 continue; 2902 err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, 2903 chain->index, net, skb, block, 2904 NETLINK_CB(cb->skb).portid, 2905 cb->nlh->nlmsg_seq, NLM_F_MULTI, 2906 RTM_NEWCHAIN); 2907 if (err <= 0) 2908 break; 2909 index++; 2910 } 2911 mutex_unlock(&block->lock); 2912 2913 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) 2914 tcf_block_refcnt_put(block, true); 2915 cb->args[0] = index; 2916 2917 out: 2918 /* If we did no progress, the error (EMSGSIZE) is real */ 2919 if (skb->len == 0 && err) 2920 return err; 2921 return skb->len; 2922 } 2923 2924 void tcf_exts_destroy(struct tcf_exts *exts) 2925 { 2926 #ifdef CONFIG_NET_CLS_ACT 2927 if (exts->actions) { 2928 tcf_action_destroy(exts->actions, TCA_ACT_UNBIND); 2929 kfree(exts->actions); 2930 } 2931 exts->nr_actions = 0; 2932 #endif 2933 } 2934 EXPORT_SYMBOL(tcf_exts_destroy); 2935 2936 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, 2937 struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, 2938 bool rtnl_held, struct netlink_ext_ack *extack) 2939 { 2940 #ifdef CONFIG_NET_CLS_ACT 2941 { 2942 struct tc_action *act; 2943 size_t attr_size = 0; 2944 2945 if (exts->police && tb[exts->police]) { 2946 act = tcf_action_init_1(net, tp, tb[exts->police], 2947 rate_tlv, "police", ovr, 2948 TCA_ACT_BIND, rtnl_held, 2949 extack); 2950 if (IS_ERR(act)) 2951 return PTR_ERR(act); 2952 2953 act->type = exts->type = TCA_OLD_COMPAT; 2954 exts->actions[0] = act; 2955 exts->nr_actions = 1; 2956 } else if (exts->action && tb[exts->action]) { 2957 int err; 2958 2959 err = tcf_action_init(net, tp, tb[exts->action], 2960 rate_tlv, NULL, ovr, TCA_ACT_BIND, 2961 exts->actions, &attr_size, 2962 rtnl_held, extack); 2963 if (err < 0) 2964 return err; 2965 exts->nr_actions = err; 2966 } 2967 } 2968 #else 2969 if ((exts->action && tb[exts->action]) || 2970 (exts->police && tb[exts->police])) { 2971 NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)"); 2972 return -EOPNOTSUPP; 2973 } 2974 #endif 2975 2976 return 0; 2977 } 2978 EXPORT_SYMBOL(tcf_exts_validate); 2979 2980 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) 2981 { 2982 #ifdef CONFIG_NET_CLS_ACT 2983 struct tcf_exts old = *dst; 2984 2985 *dst = *src; 2986 tcf_exts_destroy(&old); 2987 #endif 2988 } 2989 EXPORT_SYMBOL(tcf_exts_change); 2990 2991 #ifdef CONFIG_NET_CLS_ACT 2992 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts) 2993 { 2994 if (exts->nr_actions == 0) 2995 return NULL; 2996 else 2997 return exts->actions[0]; 2998 } 2999 #endif 3000 3001 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) 3002 { 3003 #ifdef CONFIG_NET_CLS_ACT 3004 struct nlattr *nest; 3005 3006 if (exts->action && tcf_exts_has_actions(exts)) { 3007 /* 3008 * again for backward compatible mode - we want 3009 * to work with both old and new modes of entering 3010 * tc data even if iproute2 was newer - jhs 3011 */ 3012 if (exts->type != TCA_OLD_COMPAT) { 3013 nest = nla_nest_start_noflag(skb, exts->action); 3014 if (nest == NULL) 3015 goto nla_put_failure; 3016 3017 if (tcf_action_dump(skb, exts->actions, 0, 0) < 0) 3018 goto nla_put_failure; 3019 nla_nest_end(skb, nest); 3020 } else if (exts->police) { 3021 struct tc_action *act = tcf_exts_first_act(exts); 3022 nest = nla_nest_start_noflag(skb, exts->police); 3023 if (nest == NULL || !act) 3024 goto nla_put_failure; 3025 if (tcf_action_dump_old(skb, act, 0, 0) < 0) 3026 goto nla_put_failure; 3027 nla_nest_end(skb, nest); 3028 } 3029 } 3030 return 0; 3031 3032 nla_put_failure: 3033 nla_nest_cancel(skb, nest); 3034 return -1; 3035 #else 3036 return 0; 3037 #endif 3038 } 3039 EXPORT_SYMBOL(tcf_exts_dump); 3040 3041 3042 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) 3043 { 3044 #ifdef CONFIG_NET_CLS_ACT 3045 struct tc_action *a = tcf_exts_first_act(exts); 3046 if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0) 3047 return -1; 3048 #endif 3049 return 0; 3050 } 3051 EXPORT_SYMBOL(tcf_exts_dump_stats); 3052 3053 static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) 3054 { 3055 if (*flags & TCA_CLS_FLAGS_IN_HW) 3056 return; 3057 *flags |= TCA_CLS_FLAGS_IN_HW; 3058 atomic_inc(&block->offloadcnt); 3059 } 3060 3061 static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) 3062 { 3063 if (!(*flags & TCA_CLS_FLAGS_IN_HW)) 3064 return; 3065 *flags &= ~TCA_CLS_FLAGS_IN_HW; 3066 atomic_dec(&block->offloadcnt); 3067 } 3068 3069 static void tc_cls_offload_cnt_update(struct tcf_block *block, 3070 struct tcf_proto *tp, u32 *cnt, 3071 u32 *flags, u32 diff, bool add) 3072 { 3073 lockdep_assert_held(&block->cb_lock); 3074 3075 spin_lock(&tp->lock); 3076 if (add) { 3077 if (!*cnt) 3078 tcf_block_offload_inc(block, flags); 3079 *cnt += diff; 3080 } else { 3081 *cnt -= diff; 3082 if (!*cnt) 3083 tcf_block_offload_dec(block, flags); 3084 } 3085 spin_unlock(&tp->lock); 3086 } 3087 3088 static void 3089 tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, 3090 u32 *cnt, u32 *flags) 3091 { 3092 lockdep_assert_held(&block->cb_lock); 3093 3094 spin_lock(&tp->lock); 3095 tcf_block_offload_dec(block, flags); 3096 *cnt = 0; 3097 spin_unlock(&tp->lock); 3098 } 3099 3100 static int 3101 __tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, 3102 void *type_data, bool err_stop) 3103 { 3104 struct flow_block_cb *block_cb; 3105 int ok_count = 0; 3106 int err; 3107 3108 list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { 3109 err = block_cb->cb(type, type_data, block_cb->cb_priv); 3110 if (err) { 3111 if (err_stop) 3112 return err; 3113 } else { 3114 ok_count++; 3115 } 3116 } 3117 return ok_count; 3118 } 3119 3120 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, 3121 void *type_data, bool err_stop, bool rtnl_held) 3122 { 3123 bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; 3124 int ok_count; 3125 3126 retry: 3127 if (take_rtnl) 3128 rtnl_lock(); 3129 down_read(&block->cb_lock); 3130 /* Need to obtain rtnl lock if block is bound to devs that require it. 3131 * In block bind code cb_lock is obtained while holding rtnl, so we must 3132 * obtain the locks in same order here. 3133 */ 3134 if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { 3135 up_read(&block->cb_lock); 3136 take_rtnl = true; 3137 goto retry; 3138 } 3139 3140 ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); 3141 3142 up_read(&block->cb_lock); 3143 if (take_rtnl) 3144 rtnl_unlock(); 3145 return ok_count; 3146 } 3147 EXPORT_SYMBOL(tc_setup_cb_call); 3148 3149 /* Non-destructive filter add. If filter that wasn't already in hardware is 3150 * successfully offloaded, increment block offloads counter. On failure, 3151 * previously offloaded filter is considered to be intact and offloads counter 3152 * is not decremented. 3153 */ 3154 3155 int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, 3156 enum tc_setup_type type, void *type_data, bool err_stop, 3157 u32 *flags, unsigned int *in_hw_count, bool rtnl_held) 3158 { 3159 bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; 3160 int ok_count; 3161 3162 retry: 3163 if (take_rtnl) 3164 rtnl_lock(); 3165 down_read(&block->cb_lock); 3166 /* Need to obtain rtnl lock if block is bound to devs that require it. 3167 * In block bind code cb_lock is obtained while holding rtnl, so we must 3168 * obtain the locks in same order here. 3169 */ 3170 if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { 3171 up_read(&block->cb_lock); 3172 take_rtnl = true; 3173 goto retry; 3174 } 3175 3176 /* Make sure all netdevs sharing this block are offload-capable. */ 3177 if (block->nooffloaddevcnt && err_stop) { 3178 ok_count = -EOPNOTSUPP; 3179 goto err_unlock; 3180 } 3181 3182 ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); 3183 if (ok_count < 0) 3184 goto err_unlock; 3185 3186 if (tp->ops->hw_add) 3187 tp->ops->hw_add(tp, type_data); 3188 if (ok_count > 0) 3189 tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 3190 ok_count, true); 3191 err_unlock: 3192 up_read(&block->cb_lock); 3193 if (take_rtnl) 3194 rtnl_unlock(); 3195 return ok_count < 0 ? ok_count : 0; 3196 } 3197 EXPORT_SYMBOL(tc_setup_cb_add); 3198 3199 /* Destructive filter replace. If filter that wasn't already in hardware is 3200 * successfully offloaded, increment block offload counter. On failure, 3201 * previously offloaded filter is considered to be destroyed and offload counter 3202 * is decremented. 3203 */ 3204 3205 int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, 3206 enum tc_setup_type type, void *type_data, bool err_stop, 3207 u32 *old_flags, unsigned int *old_in_hw_count, 3208 u32 *new_flags, unsigned int *new_in_hw_count, 3209 bool rtnl_held) 3210 { 3211 bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; 3212 int ok_count; 3213 3214 retry: 3215 if (take_rtnl) 3216 rtnl_lock(); 3217 down_read(&block->cb_lock); 3218 /* Need to obtain rtnl lock if block is bound to devs that require it. 3219 * In block bind code cb_lock is obtained while holding rtnl, so we must 3220 * obtain the locks in same order here. 3221 */ 3222 if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { 3223 up_read(&block->cb_lock); 3224 take_rtnl = true; 3225 goto retry; 3226 } 3227 3228 /* Make sure all netdevs sharing this block are offload-capable. */ 3229 if (block->nooffloaddevcnt && err_stop) { 3230 ok_count = -EOPNOTSUPP; 3231 goto err_unlock; 3232 } 3233 3234 tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); 3235 if (tp->ops->hw_del) 3236 tp->ops->hw_del(tp, type_data); 3237 3238 ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); 3239 if (ok_count < 0) 3240 goto err_unlock; 3241 3242 if (tp->ops->hw_add) 3243 tp->ops->hw_add(tp, type_data); 3244 if (ok_count > 0) 3245 tc_cls_offload_cnt_update(block, tp, new_in_hw_count, 3246 new_flags, ok_count, true); 3247 err_unlock: 3248 up_read(&block->cb_lock); 3249 if (take_rtnl) 3250 rtnl_unlock(); 3251 return ok_count < 0 ? ok_count : 0; 3252 } 3253 EXPORT_SYMBOL(tc_setup_cb_replace); 3254 3255 /* Destroy filter and decrement block offload counter, if filter was previously 3256 * offloaded. 3257 */ 3258 3259 int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, 3260 enum tc_setup_type type, void *type_data, bool err_stop, 3261 u32 *flags, unsigned int *in_hw_count, bool rtnl_held) 3262 { 3263 bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; 3264 int ok_count; 3265 3266 retry: 3267 if (take_rtnl) 3268 rtnl_lock(); 3269 down_read(&block->cb_lock); 3270 /* Need to obtain rtnl lock if block is bound to devs that require it. 3271 * In block bind code cb_lock is obtained while holding rtnl, so we must 3272 * obtain the locks in same order here. 3273 */ 3274 if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { 3275 up_read(&block->cb_lock); 3276 take_rtnl = true; 3277 goto retry; 3278 } 3279 3280 ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); 3281 3282 tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); 3283 if (tp->ops->hw_del) 3284 tp->ops->hw_del(tp, type_data); 3285 3286 up_read(&block->cb_lock); 3287 if (take_rtnl) 3288 rtnl_unlock(); 3289 return ok_count < 0 ? ok_count : 0; 3290 } 3291 EXPORT_SYMBOL(tc_setup_cb_destroy); 3292 3293 int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, 3294 bool add, flow_setup_cb_t *cb, 3295 enum tc_setup_type type, void *type_data, 3296 void *cb_priv, u32 *flags, unsigned int *in_hw_count) 3297 { 3298 int err = cb(type, type_data, cb_priv); 3299 3300 if (err) { 3301 if (add && tc_skip_sw(*flags)) 3302 return err; 3303 } else { 3304 tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, 3305 add); 3306 } 3307 3308 return 0; 3309 } 3310 EXPORT_SYMBOL(tc_setup_cb_reoffload); 3311 3312 void tc_cleanup_flow_action(struct flow_action *flow_action) 3313 { 3314 struct flow_action_entry *entry; 3315 int i; 3316 3317 flow_action_for_each(i, entry, flow_action) 3318 if (entry->destructor) 3319 entry->destructor(entry->destructor_priv); 3320 } 3321 EXPORT_SYMBOL(tc_cleanup_flow_action); 3322 3323 static void tcf_mirred_get_dev(struct flow_action_entry *entry, 3324 const struct tc_action *act) 3325 { 3326 #ifdef CONFIG_NET_CLS_ACT 3327 entry->dev = act->ops->get_dev(act, &entry->destructor); 3328 if (!entry->dev) 3329 return; 3330 entry->destructor_priv = entry->dev; 3331 #endif 3332 } 3333 3334 static void tcf_tunnel_encap_put_tunnel(void *priv) 3335 { 3336 struct ip_tunnel_info *tunnel = priv; 3337 3338 kfree(tunnel); 3339 } 3340 3341 static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, 3342 const struct tc_action *act) 3343 { 3344 entry->tunnel = tcf_tunnel_info_copy(act); 3345 if (!entry->tunnel) 3346 return -ENOMEM; 3347 entry->destructor = tcf_tunnel_encap_put_tunnel; 3348 entry->destructor_priv = entry->tunnel; 3349 return 0; 3350 } 3351 3352 static void tcf_sample_get_group(struct flow_action_entry *entry, 3353 const struct tc_action *act) 3354 { 3355 #ifdef CONFIG_NET_CLS_ACT 3356 entry->sample.psample_group = 3357 act->ops->get_psample_group(act, &entry->destructor); 3358 entry->destructor_priv = entry->sample.psample_group; 3359 #endif 3360 } 3361 3362 int tc_setup_flow_action(struct flow_action *flow_action, 3363 const struct tcf_exts *exts, bool rtnl_held) 3364 { 3365 const struct tc_action *act; 3366 int i, j, k, err = 0; 3367 3368 if (!exts) 3369 return 0; 3370 3371 if (!rtnl_held) 3372 rtnl_lock(); 3373 3374 j = 0; 3375 tcf_exts_for_each_action(i, act, exts) { 3376 struct flow_action_entry *entry; 3377 3378 entry = &flow_action->entries[j]; 3379 if (is_tcf_gact_ok(act)) { 3380 entry->id = FLOW_ACTION_ACCEPT; 3381 } else if (is_tcf_gact_shot(act)) { 3382 entry->id = FLOW_ACTION_DROP; 3383 } else if (is_tcf_gact_trap(act)) { 3384 entry->id = FLOW_ACTION_TRAP; 3385 } else if (is_tcf_gact_goto_chain(act)) { 3386 entry->id = FLOW_ACTION_GOTO; 3387 entry->chain_index = tcf_gact_goto_chain_index(act); 3388 } else if (is_tcf_mirred_egress_redirect(act)) { 3389 entry->id = FLOW_ACTION_REDIRECT; 3390 tcf_mirred_get_dev(entry, act); 3391 } else if (is_tcf_mirred_egress_mirror(act)) { 3392 entry->id = FLOW_ACTION_MIRRED; 3393 tcf_mirred_get_dev(entry, act); 3394 } else if (is_tcf_mirred_ingress_redirect(act)) { 3395 entry->id = FLOW_ACTION_REDIRECT_INGRESS; 3396 tcf_mirred_get_dev(entry, act); 3397 } else if (is_tcf_mirred_ingress_mirror(act)) { 3398 entry->id = FLOW_ACTION_MIRRED_INGRESS; 3399 tcf_mirred_get_dev(entry, act); 3400 } else if (is_tcf_vlan(act)) { 3401 switch (tcf_vlan_action(act)) { 3402 case TCA_VLAN_ACT_PUSH: 3403 entry->id = FLOW_ACTION_VLAN_PUSH; 3404 entry->vlan.vid = tcf_vlan_push_vid(act); 3405 entry->vlan.proto = tcf_vlan_push_proto(act); 3406 entry->vlan.prio = tcf_vlan_push_prio(act); 3407 break; 3408 case TCA_VLAN_ACT_POP: 3409 entry->id = FLOW_ACTION_VLAN_POP; 3410 break; 3411 case TCA_VLAN_ACT_MODIFY: 3412 entry->id = FLOW_ACTION_VLAN_MANGLE; 3413 entry->vlan.vid = tcf_vlan_push_vid(act); 3414 entry->vlan.proto = tcf_vlan_push_proto(act); 3415 entry->vlan.prio = tcf_vlan_push_prio(act); 3416 break; 3417 default: 3418 err = -EOPNOTSUPP; 3419 goto err_out; 3420 } 3421 } else if (is_tcf_tunnel_set(act)) { 3422 entry->id = FLOW_ACTION_TUNNEL_ENCAP; 3423 err = tcf_tunnel_encap_get_tunnel(entry, act); 3424 if (err) 3425 goto err_out; 3426 } else if (is_tcf_tunnel_release(act)) { 3427 entry->id = FLOW_ACTION_TUNNEL_DECAP; 3428 } else if (is_tcf_pedit(act)) { 3429 for (k = 0; k < tcf_pedit_nkeys(act); k++) { 3430 switch (tcf_pedit_cmd(act, k)) { 3431 case TCA_PEDIT_KEY_EX_CMD_SET: 3432 entry->id = FLOW_ACTION_MANGLE; 3433 break; 3434 case TCA_PEDIT_KEY_EX_CMD_ADD: 3435 entry->id = FLOW_ACTION_ADD; 3436 break; 3437 default: 3438 err = -EOPNOTSUPP; 3439 goto err_out; 3440 } 3441 entry->mangle.htype = tcf_pedit_htype(act, k); 3442 entry->mangle.mask = tcf_pedit_mask(act, k); 3443 entry->mangle.val = tcf_pedit_val(act, k); 3444 entry->mangle.offset = tcf_pedit_offset(act, k); 3445 entry = &flow_action->entries[++j]; 3446 } 3447 } else if (is_tcf_csum(act)) { 3448 entry->id = FLOW_ACTION_CSUM; 3449 entry->csum_flags = tcf_csum_update_flags(act); 3450 } else if (is_tcf_skbedit_mark(act)) { 3451 entry->id = FLOW_ACTION_MARK; 3452 entry->mark = tcf_skbedit_mark(act); 3453 } else if (is_tcf_sample(act)) { 3454 entry->id = FLOW_ACTION_SAMPLE; 3455 entry->sample.trunc_size = tcf_sample_trunc_size(act); 3456 entry->sample.truncate = tcf_sample_truncate(act); 3457 entry->sample.rate = tcf_sample_rate(act); 3458 tcf_sample_get_group(entry, act); 3459 } else if (is_tcf_police(act)) { 3460 entry->id = FLOW_ACTION_POLICE; 3461 entry->police.burst = tcf_police_tcfp_burst(act); 3462 entry->police.rate_bytes_ps = 3463 tcf_police_rate_bytes_ps(act); 3464 } else if (is_tcf_ct(act)) { 3465 entry->id = FLOW_ACTION_CT; 3466 entry->ct.action = tcf_ct_action(act); 3467 entry->ct.zone = tcf_ct_zone(act); 3468 } else if (is_tcf_mpls(act)) { 3469 switch (tcf_mpls_action(act)) { 3470 case TCA_MPLS_ACT_PUSH: 3471 entry->id = FLOW_ACTION_MPLS_PUSH; 3472 entry->mpls_push.proto = tcf_mpls_proto(act); 3473 entry->mpls_push.label = tcf_mpls_label(act); 3474 entry->mpls_push.tc = tcf_mpls_tc(act); 3475 entry->mpls_push.bos = tcf_mpls_bos(act); 3476 entry->mpls_push.ttl = tcf_mpls_ttl(act); 3477 break; 3478 case TCA_MPLS_ACT_POP: 3479 entry->id = FLOW_ACTION_MPLS_POP; 3480 entry->mpls_pop.proto = tcf_mpls_proto(act); 3481 break; 3482 case TCA_MPLS_ACT_MODIFY: 3483 entry->id = FLOW_ACTION_MPLS_MANGLE; 3484 entry->mpls_mangle.label = tcf_mpls_label(act); 3485 entry->mpls_mangle.tc = tcf_mpls_tc(act); 3486 entry->mpls_mangle.bos = tcf_mpls_bos(act); 3487 entry->mpls_mangle.ttl = tcf_mpls_ttl(act); 3488 break; 3489 default: 3490 goto err_out; 3491 } 3492 } else if (is_tcf_skbedit_ptype(act)) { 3493 entry->id = FLOW_ACTION_PTYPE; 3494 entry->ptype = tcf_skbedit_ptype(act); 3495 } else { 3496 err = -EOPNOTSUPP; 3497 goto err_out; 3498 } 3499 3500 if (!is_tcf_pedit(act)) 3501 j++; 3502 } 3503 3504 err_out: 3505 if (!rtnl_held) 3506 rtnl_unlock(); 3507 3508 if (err) 3509 tc_cleanup_flow_action(flow_action); 3510 3511 return err; 3512 } 3513 EXPORT_SYMBOL(tc_setup_flow_action); 3514 3515 unsigned int tcf_exts_num_actions(struct tcf_exts *exts) 3516 { 3517 unsigned int num_acts = 0; 3518 struct tc_action *act; 3519 int i; 3520 3521 tcf_exts_for_each_action(i, act, exts) { 3522 if (is_tcf_pedit(act)) 3523 num_acts += tcf_pedit_nkeys(act); 3524 else 3525 num_acts++; 3526 } 3527 return num_acts; 3528 } 3529 EXPORT_SYMBOL(tcf_exts_num_actions); 3530 3531 static __net_init int tcf_net_init(struct net *net) 3532 { 3533 struct tcf_net *tn = net_generic(net, tcf_net_id); 3534 3535 spin_lock_init(&tn->idr_lock); 3536 idr_init(&tn->idr); 3537 return 0; 3538 } 3539 3540 static void __net_exit tcf_net_exit(struct net *net) 3541 { 3542 struct tcf_net *tn = net_generic(net, tcf_net_id); 3543 3544 idr_destroy(&tn->idr); 3545 } 3546 3547 static struct pernet_operations tcf_net_ops = { 3548 .init = tcf_net_init, 3549 .exit = tcf_net_exit, 3550 .id = &tcf_net_id, 3551 .size = sizeof(struct tcf_net), 3552 }; 3553 3554 static struct flow_indr_block_ing_entry block_ing_entry = { 3555 .cb = tc_indr_block_get_and_ing_cmd, 3556 .list = LIST_HEAD_INIT(block_ing_entry.list), 3557 }; 3558 3559 static int __init tc_filter_init(void) 3560 { 3561 int err; 3562 3563 tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); 3564 if (!tc_filter_wq) 3565 return -ENOMEM; 3566 3567 err = register_pernet_subsys(&tcf_net_ops); 3568 if (err) 3569 goto err_register_pernet_subsys; 3570 3571 flow_indr_add_block_ing_cb(&block_ing_entry); 3572 3573 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 3574 RTNL_FLAG_DOIT_UNLOCKED); 3575 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 3576 RTNL_FLAG_DOIT_UNLOCKED); 3577 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, 3578 tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); 3579 rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); 3580 rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); 3581 rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, 3582 tc_dump_chain, 0); 3583 3584 return 0; 3585 3586 err_register_pernet_subsys: 3587 destroy_workqueue(tc_filter_wq); 3588 return err; 3589 } 3590 3591 subsys_initcall(tc_filter_init); 3592