1 /* 2 * net/sched/cls_flow.c Generic flow classifier 3 * 4 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/init.h> 14 #include <linux/list.h> 15 #include <linux/jhash.h> 16 #include <linux/random.h> 17 #include <linux/pkt_cls.h> 18 #include <linux/skbuff.h> 19 #include <linux/in.h> 20 #include <linux/ip.h> 21 #include <linux/ipv6.h> 22 #include <linux/if_vlan.h> 23 #include <linux/slab.h> 24 25 #include <net/pkt_cls.h> 26 #include <net/ip.h> 27 #include <net/route.h> 28 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 29 #include <net/netfilter/nf_conntrack.h> 30 #endif 31 32 struct flow_head { 33 struct list_head filters; 34 }; 35 36 struct flow_filter { 37 struct list_head list; 38 struct tcf_exts exts; 39 struct tcf_ematch_tree ematches; 40 struct timer_list perturb_timer; 41 u32 perturb_period; 42 u32 handle; 43 44 u32 nkeys; 45 u32 keymask; 46 u32 mode; 47 u32 mask; 48 u32 xor; 49 u32 rshift; 50 u32 addend; 51 u32 divisor; 52 u32 baseclass; 53 u32 hashrnd; 54 }; 55 56 static const struct tcf_ext_map flow_ext_map = { 57 .action = TCA_FLOW_ACT, 58 .police = TCA_FLOW_POLICE, 59 }; 60 61 static inline u32 addr_fold(void *addr) 62 { 63 unsigned long a = (unsigned long)addr; 64 65 return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); 66 } 67 68 static u32 flow_get_src(const struct sk_buff *skb) 69 { 70 switch (skb->protocol) { 71 case htons(ETH_P_IP): 72 return ntohl(ip_hdr(skb)->saddr); 73 case htons(ETH_P_IPV6): 74 return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); 75 default: 76 return addr_fold(skb->sk); 77 } 78 } 79 80 static u32 flow_get_dst(const struct sk_buff *skb) 81 { 82 switch (skb->protocol) { 83 case htons(ETH_P_IP): 84 return ntohl(ip_hdr(skb)->daddr); 85 case htons(ETH_P_IPV6): 86 return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); 87 default: 88 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 89 } 90 } 91 92 static u32 flow_get_proto(const struct sk_buff *skb) 93 { 94 switch (skb->protocol) { 95 case htons(ETH_P_IP): 96 return ip_hdr(skb)->protocol; 97 case htons(ETH_P_IPV6): 98 return ipv6_hdr(skb)->nexthdr; 99 default: 100 return 0; 101 } 102 } 103 104 static int has_ports(u8 protocol) 105 { 106 switch (protocol) { 107 case IPPROTO_TCP: 108 case IPPROTO_UDP: 109 case IPPROTO_UDPLITE: 110 case IPPROTO_SCTP: 111 case IPPROTO_DCCP: 112 case IPPROTO_ESP: 113 return 1; 114 default: 115 return 0; 116 } 117 } 118 119 static u32 flow_get_proto_src(const struct sk_buff *skb) 120 { 121 u32 res = 0; 122 123 switch (skb->protocol) { 124 case htons(ETH_P_IP): { 125 struct iphdr *iph = ip_hdr(skb); 126 127 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 128 has_ports(iph->protocol)) 129 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 130 break; 131 } 132 case htons(ETH_P_IPV6): { 133 struct ipv6hdr *iph = ipv6_hdr(skb); 134 135 if (has_ports(iph->nexthdr)) 136 res = ntohs(*(__be16 *)&iph[1]); 137 break; 138 } 139 default: 140 res = addr_fold(skb->sk); 141 } 142 143 return res; 144 } 145 146 static u32 flow_get_proto_dst(const struct sk_buff *skb) 147 { 148 u32 res = 0; 149 150 switch (skb->protocol) { 151 case htons(ETH_P_IP): { 152 struct iphdr *iph = ip_hdr(skb); 153 154 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 155 has_ports(iph->protocol)) 156 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 157 break; 158 } 159 case htons(ETH_P_IPV6): { 160 struct ipv6hdr *iph = ipv6_hdr(skb); 161 162 if (has_ports(iph->nexthdr)) 163 res = ntohs(*(__be16 *)((void *)&iph[1] + 2)); 164 break; 165 } 166 default: 167 res = addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 168 } 169 170 return res; 171 } 172 173 static u32 flow_get_iif(const struct sk_buff *skb) 174 { 175 return skb->skb_iif; 176 } 177 178 static u32 flow_get_priority(const struct sk_buff *skb) 179 { 180 return skb->priority; 181 } 182 183 static u32 flow_get_mark(const struct sk_buff *skb) 184 { 185 return skb->mark; 186 } 187 188 static u32 flow_get_nfct(const struct sk_buff *skb) 189 { 190 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 191 return addr_fold(skb->nfct); 192 #else 193 return 0; 194 #endif 195 } 196 197 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 198 #define CTTUPLE(skb, member) \ 199 ({ \ 200 enum ip_conntrack_info ctinfo; \ 201 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ 202 if (ct == NULL) \ 203 goto fallback; \ 204 ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ 205 }) 206 #else 207 #define CTTUPLE(skb, member) \ 208 ({ \ 209 goto fallback; \ 210 0; \ 211 }) 212 #endif 213 214 static u32 flow_get_nfct_src(const struct sk_buff *skb) 215 { 216 switch (skb->protocol) { 217 case htons(ETH_P_IP): 218 return ntohl(CTTUPLE(skb, src.u3.ip)); 219 case htons(ETH_P_IPV6): 220 return ntohl(CTTUPLE(skb, src.u3.ip6[3])); 221 } 222 fallback: 223 return flow_get_src(skb); 224 } 225 226 static u32 flow_get_nfct_dst(const struct sk_buff *skb) 227 { 228 switch (skb->protocol) { 229 case htons(ETH_P_IP): 230 return ntohl(CTTUPLE(skb, dst.u3.ip)); 231 case htons(ETH_P_IPV6): 232 return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); 233 } 234 fallback: 235 return flow_get_dst(skb); 236 } 237 238 static u32 flow_get_nfct_proto_src(const struct sk_buff *skb) 239 { 240 return ntohs(CTTUPLE(skb, src.u.all)); 241 fallback: 242 return flow_get_proto_src(skb); 243 } 244 245 static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb) 246 { 247 return ntohs(CTTUPLE(skb, dst.u.all)); 248 fallback: 249 return flow_get_proto_dst(skb); 250 } 251 252 static u32 flow_get_rtclassid(const struct sk_buff *skb) 253 { 254 #ifdef CONFIG_NET_CLS_ROUTE 255 if (skb_dst(skb)) 256 return skb_dst(skb)->tclassid; 257 #endif 258 return 0; 259 } 260 261 static u32 flow_get_skuid(const struct sk_buff *skb) 262 { 263 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) 264 return skb->sk->sk_socket->file->f_cred->fsuid; 265 return 0; 266 } 267 268 static u32 flow_get_skgid(const struct sk_buff *skb) 269 { 270 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) 271 return skb->sk->sk_socket->file->f_cred->fsgid; 272 return 0; 273 } 274 275 static u32 flow_get_vlan_tag(const struct sk_buff *skb) 276 { 277 u16 uninitialized_var(tag); 278 279 if (vlan_get_tag(skb, &tag) < 0) 280 return 0; 281 return tag & VLAN_VID_MASK; 282 } 283 284 static u32 flow_key_get(const struct sk_buff *skb, int key) 285 { 286 switch (key) { 287 case FLOW_KEY_SRC: 288 return flow_get_src(skb); 289 case FLOW_KEY_DST: 290 return flow_get_dst(skb); 291 case FLOW_KEY_PROTO: 292 return flow_get_proto(skb); 293 case FLOW_KEY_PROTO_SRC: 294 return flow_get_proto_src(skb); 295 case FLOW_KEY_PROTO_DST: 296 return flow_get_proto_dst(skb); 297 case FLOW_KEY_IIF: 298 return flow_get_iif(skb); 299 case FLOW_KEY_PRIORITY: 300 return flow_get_priority(skb); 301 case FLOW_KEY_MARK: 302 return flow_get_mark(skb); 303 case FLOW_KEY_NFCT: 304 return flow_get_nfct(skb); 305 case FLOW_KEY_NFCT_SRC: 306 return flow_get_nfct_src(skb); 307 case FLOW_KEY_NFCT_DST: 308 return flow_get_nfct_dst(skb); 309 case FLOW_KEY_NFCT_PROTO_SRC: 310 return flow_get_nfct_proto_src(skb); 311 case FLOW_KEY_NFCT_PROTO_DST: 312 return flow_get_nfct_proto_dst(skb); 313 case FLOW_KEY_RTCLASSID: 314 return flow_get_rtclassid(skb); 315 case FLOW_KEY_SKUID: 316 return flow_get_skuid(skb); 317 case FLOW_KEY_SKGID: 318 return flow_get_skgid(skb); 319 case FLOW_KEY_VLAN_TAG: 320 return flow_get_vlan_tag(skb); 321 default: 322 WARN_ON(1); 323 return 0; 324 } 325 } 326 327 static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, 328 struct tcf_result *res) 329 { 330 struct flow_head *head = tp->root; 331 struct flow_filter *f; 332 u32 keymask; 333 u32 classid; 334 unsigned int n, key; 335 int r; 336 337 list_for_each_entry(f, &head->filters, list) { 338 u32 keys[f->nkeys]; 339 340 if (!tcf_em_tree_match(skb, &f->ematches, NULL)) 341 continue; 342 343 keymask = f->keymask; 344 345 for (n = 0; n < f->nkeys; n++) { 346 key = ffs(keymask) - 1; 347 keymask &= ~(1 << key); 348 keys[n] = flow_key_get(skb, key); 349 } 350 351 if (f->mode == FLOW_MODE_HASH) 352 classid = jhash2(keys, f->nkeys, f->hashrnd); 353 else { 354 classid = keys[0]; 355 classid = (classid & f->mask) ^ f->xor; 356 classid = (classid >> f->rshift) + f->addend; 357 } 358 359 if (f->divisor) 360 classid %= f->divisor; 361 362 res->class = 0; 363 res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid); 364 365 r = tcf_exts_exec(skb, &f->exts, res); 366 if (r < 0) 367 continue; 368 return r; 369 } 370 return -1; 371 } 372 373 static void flow_perturbation(unsigned long arg) 374 { 375 struct flow_filter *f = (struct flow_filter *)arg; 376 377 get_random_bytes(&f->hashrnd, 4); 378 if (f->perturb_period) 379 mod_timer(&f->perturb_timer, jiffies + f->perturb_period); 380 } 381 382 static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { 383 [TCA_FLOW_KEYS] = { .type = NLA_U32 }, 384 [TCA_FLOW_MODE] = { .type = NLA_U32 }, 385 [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, 386 [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, 387 [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, 388 [TCA_FLOW_MASK] = { .type = NLA_U32 }, 389 [TCA_FLOW_XOR] = { .type = NLA_U32 }, 390 [TCA_FLOW_DIVISOR] = { .type = NLA_U32 }, 391 [TCA_FLOW_ACT] = { .type = NLA_NESTED }, 392 [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, 393 [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, 394 [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, 395 }; 396 397 static int flow_change(struct tcf_proto *tp, unsigned long base, 398 u32 handle, struct nlattr **tca, 399 unsigned long *arg) 400 { 401 struct flow_head *head = tp->root; 402 struct flow_filter *f; 403 struct nlattr *opt = tca[TCA_OPTIONS]; 404 struct nlattr *tb[TCA_FLOW_MAX + 1]; 405 struct tcf_exts e; 406 struct tcf_ematch_tree t; 407 unsigned int nkeys = 0; 408 unsigned int perturb_period = 0; 409 u32 baseclass = 0; 410 u32 keymask = 0; 411 u32 mode; 412 int err; 413 414 if (opt == NULL) 415 return -EINVAL; 416 417 err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy); 418 if (err < 0) 419 return err; 420 421 if (tb[TCA_FLOW_BASECLASS]) { 422 baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]); 423 if (TC_H_MIN(baseclass) == 0) 424 return -EINVAL; 425 } 426 427 if (tb[TCA_FLOW_KEYS]) { 428 keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); 429 430 nkeys = hweight32(keymask); 431 if (nkeys == 0) 432 return -EINVAL; 433 434 if (fls(keymask) - 1 > FLOW_KEY_MAX) 435 return -EOPNOTSUPP; 436 } 437 438 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); 439 if (err < 0) 440 return err; 441 442 err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); 443 if (err < 0) 444 goto err1; 445 446 f = (struct flow_filter *)*arg; 447 if (f != NULL) { 448 err = -EINVAL; 449 if (f->handle != handle && handle) 450 goto err2; 451 452 mode = f->mode; 453 if (tb[TCA_FLOW_MODE]) 454 mode = nla_get_u32(tb[TCA_FLOW_MODE]); 455 if (mode != FLOW_MODE_HASH && nkeys > 1) 456 goto err2; 457 458 if (mode == FLOW_MODE_HASH) 459 perturb_period = f->perturb_period; 460 if (tb[TCA_FLOW_PERTURB]) { 461 if (mode != FLOW_MODE_HASH) 462 goto err2; 463 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; 464 } 465 } else { 466 err = -EINVAL; 467 if (!handle) 468 goto err2; 469 if (!tb[TCA_FLOW_KEYS]) 470 goto err2; 471 472 mode = FLOW_MODE_MAP; 473 if (tb[TCA_FLOW_MODE]) 474 mode = nla_get_u32(tb[TCA_FLOW_MODE]); 475 if (mode != FLOW_MODE_HASH && nkeys > 1) 476 goto err2; 477 478 if (tb[TCA_FLOW_PERTURB]) { 479 if (mode != FLOW_MODE_HASH) 480 goto err2; 481 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; 482 } 483 484 if (TC_H_MAJ(baseclass) == 0) 485 baseclass = TC_H_MAKE(tp->q->handle, baseclass); 486 if (TC_H_MIN(baseclass) == 0) 487 baseclass = TC_H_MAKE(baseclass, 1); 488 489 err = -ENOBUFS; 490 f = kzalloc(sizeof(*f), GFP_KERNEL); 491 if (f == NULL) 492 goto err2; 493 494 f->handle = handle; 495 f->mask = ~0U; 496 497 get_random_bytes(&f->hashrnd, 4); 498 f->perturb_timer.function = flow_perturbation; 499 f->perturb_timer.data = (unsigned long)f; 500 init_timer_deferrable(&f->perturb_timer); 501 } 502 503 tcf_exts_change(tp, &f->exts, &e); 504 tcf_em_tree_change(tp, &f->ematches, &t); 505 506 tcf_tree_lock(tp); 507 508 if (tb[TCA_FLOW_KEYS]) { 509 f->keymask = keymask; 510 f->nkeys = nkeys; 511 } 512 513 f->mode = mode; 514 515 if (tb[TCA_FLOW_MASK]) 516 f->mask = nla_get_u32(tb[TCA_FLOW_MASK]); 517 if (tb[TCA_FLOW_XOR]) 518 f->xor = nla_get_u32(tb[TCA_FLOW_XOR]); 519 if (tb[TCA_FLOW_RSHIFT]) 520 f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); 521 if (tb[TCA_FLOW_ADDEND]) 522 f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); 523 524 if (tb[TCA_FLOW_DIVISOR]) 525 f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); 526 if (baseclass) 527 f->baseclass = baseclass; 528 529 f->perturb_period = perturb_period; 530 del_timer(&f->perturb_timer); 531 if (perturb_period) 532 mod_timer(&f->perturb_timer, jiffies + perturb_period); 533 534 if (*arg == 0) 535 list_add_tail(&f->list, &head->filters); 536 537 tcf_tree_unlock(tp); 538 539 *arg = (unsigned long)f; 540 return 0; 541 542 err2: 543 tcf_em_tree_destroy(tp, &t); 544 err1: 545 tcf_exts_destroy(tp, &e); 546 return err; 547 } 548 549 static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) 550 { 551 del_timer_sync(&f->perturb_timer); 552 tcf_exts_destroy(tp, &f->exts); 553 tcf_em_tree_destroy(tp, &f->ematches); 554 kfree(f); 555 } 556 557 static int flow_delete(struct tcf_proto *tp, unsigned long arg) 558 { 559 struct flow_filter *f = (struct flow_filter *)arg; 560 561 tcf_tree_lock(tp); 562 list_del(&f->list); 563 tcf_tree_unlock(tp); 564 flow_destroy_filter(tp, f); 565 return 0; 566 } 567 568 static int flow_init(struct tcf_proto *tp) 569 { 570 struct flow_head *head; 571 572 head = kzalloc(sizeof(*head), GFP_KERNEL); 573 if (head == NULL) 574 return -ENOBUFS; 575 INIT_LIST_HEAD(&head->filters); 576 tp->root = head; 577 return 0; 578 } 579 580 static void flow_destroy(struct tcf_proto *tp) 581 { 582 struct flow_head *head = tp->root; 583 struct flow_filter *f, *next; 584 585 list_for_each_entry_safe(f, next, &head->filters, list) { 586 list_del(&f->list); 587 flow_destroy_filter(tp, f); 588 } 589 kfree(head); 590 } 591 592 static unsigned long flow_get(struct tcf_proto *tp, u32 handle) 593 { 594 struct flow_head *head = tp->root; 595 struct flow_filter *f; 596 597 list_for_each_entry(f, &head->filters, list) 598 if (f->handle == handle) 599 return (unsigned long)f; 600 return 0; 601 } 602 603 static void flow_put(struct tcf_proto *tp, unsigned long f) 604 { 605 return; 606 } 607 608 static int flow_dump(struct tcf_proto *tp, unsigned long fh, 609 struct sk_buff *skb, struct tcmsg *t) 610 { 611 struct flow_filter *f = (struct flow_filter *)fh; 612 struct nlattr *nest; 613 614 if (f == NULL) 615 return skb->len; 616 617 t->tcm_handle = f->handle; 618 619 nest = nla_nest_start(skb, TCA_OPTIONS); 620 if (nest == NULL) 621 goto nla_put_failure; 622 623 NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask); 624 NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode); 625 626 if (f->mask != ~0 || f->xor != 0) { 627 NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask); 628 NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor); 629 } 630 if (f->rshift) 631 NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift); 632 if (f->addend) 633 NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend); 634 635 if (f->divisor) 636 NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor); 637 if (f->baseclass) 638 NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass); 639 640 if (f->perturb_period) 641 NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ); 642 643 if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) 644 goto nla_put_failure; 645 #ifdef CONFIG_NET_EMATCH 646 if (f->ematches.hdr.nmatches && 647 tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) 648 goto nla_put_failure; 649 #endif 650 nla_nest_end(skb, nest); 651 652 if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0) 653 goto nla_put_failure; 654 655 return skb->len; 656 657 nla_put_failure: 658 nlmsg_trim(skb, nest); 659 return -1; 660 } 661 662 static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) 663 { 664 struct flow_head *head = tp->root; 665 struct flow_filter *f; 666 667 list_for_each_entry(f, &head->filters, list) { 668 if (arg->count < arg->skip) 669 goto skip; 670 if (arg->fn(tp, (unsigned long)f, arg) < 0) { 671 arg->stop = 1; 672 break; 673 } 674 skip: 675 arg->count++; 676 } 677 } 678 679 static struct tcf_proto_ops cls_flow_ops __read_mostly = { 680 .kind = "flow", 681 .classify = flow_classify, 682 .init = flow_init, 683 .destroy = flow_destroy, 684 .change = flow_change, 685 .delete = flow_delete, 686 .get = flow_get, 687 .put = flow_put, 688 .dump = flow_dump, 689 .walk = flow_walk, 690 .owner = THIS_MODULE, 691 }; 692 693 static int __init cls_flow_init(void) 694 { 695 return register_tcf_proto_ops(&cls_flow_ops); 696 } 697 698 static void __exit cls_flow_exit(void) 699 { 700 unregister_tcf_proto_ops(&cls_flow_ops); 701 } 702 703 module_init(cls_flow_init); 704 module_exit(cls_flow_exit); 705 706 MODULE_LICENSE("GPL"); 707 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 708 MODULE_DESCRIPTION("TC flow classifier"); 709