1 /* 2 * net/sched/cls_flow.c Generic flow classifier 3 * 4 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/init.h> 14 #include <linux/list.h> 15 #include <linux/jhash.h> 16 #include <linux/random.h> 17 #include <linux/pkt_cls.h> 18 #include <linux/skbuff.h> 19 #include <linux/in.h> 20 #include <linux/ip.h> 21 #include <linux/ipv6.h> 22 23 #include <net/pkt_cls.h> 24 #include <net/ip.h> 25 #include <net/route.h> 26 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 27 #include <net/netfilter/nf_conntrack.h> 28 #endif 29 30 struct flow_head { 31 struct list_head filters; 32 }; 33 34 struct flow_filter { 35 struct list_head list; 36 struct tcf_exts exts; 37 struct tcf_ematch_tree ematches; 38 u32 handle; 39 40 u32 nkeys; 41 u32 keymask; 42 u32 mode; 43 u32 mask; 44 u32 xor; 45 u32 rshift; 46 u32 addend; 47 u32 divisor; 48 u32 baseclass; 49 }; 50 51 static u32 flow_hashrnd __read_mostly; 52 static int flow_hashrnd_initted __read_mostly; 53 54 static const struct tcf_ext_map flow_ext_map = { 55 .action = TCA_FLOW_ACT, 56 .police = TCA_FLOW_POLICE, 57 }; 58 59 static inline u32 addr_fold(void *addr) 60 { 61 unsigned long a = (unsigned long)addr; 62 63 return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); 64 } 65 66 static u32 flow_get_src(const struct sk_buff *skb) 67 { 68 switch (skb->protocol) { 69 case __constant_htons(ETH_P_IP): 70 return ntohl(ip_hdr(skb)->saddr); 71 case __constant_htons(ETH_P_IPV6): 72 return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); 73 default: 74 return addr_fold(skb->sk); 75 } 76 } 77 78 static u32 flow_get_dst(const struct sk_buff *skb) 79 { 80 switch (skb->protocol) { 81 case __constant_htons(ETH_P_IP): 82 return ntohl(ip_hdr(skb)->daddr); 83 case __constant_htons(ETH_P_IPV6): 84 return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); 85 default: 86 return addr_fold(skb->dst) ^ (__force u16)skb->protocol; 87 } 88 } 89 90 static u32 flow_get_proto(const struct sk_buff *skb) 91 { 92 switch (skb->protocol) { 93 case __constant_htons(ETH_P_IP): 94 return ip_hdr(skb)->protocol; 95 case __constant_htons(ETH_P_IPV6): 96 return ipv6_hdr(skb)->nexthdr; 97 default: 98 return 0; 99 } 100 } 101 102 static int has_ports(u8 protocol) 103 { 104 switch (protocol) { 105 case IPPROTO_TCP: 106 case IPPROTO_UDP: 107 case IPPROTO_UDPLITE: 108 case IPPROTO_SCTP: 109 case IPPROTO_DCCP: 110 case IPPROTO_ESP: 111 return 1; 112 default: 113 return 0; 114 } 115 } 116 117 static u32 flow_get_proto_src(const struct sk_buff *skb) 118 { 119 u32 res = 0; 120 121 switch (skb->protocol) { 122 case __constant_htons(ETH_P_IP): { 123 struct iphdr *iph = ip_hdr(skb); 124 125 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 126 has_ports(iph->protocol)) 127 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 128 break; 129 } 130 case __constant_htons(ETH_P_IPV6): { 131 struct ipv6hdr *iph = ipv6_hdr(skb); 132 133 if (has_ports(iph->nexthdr)) 134 res = ntohs(*(__be16 *)&iph[1]); 135 break; 136 } 137 default: 138 res = addr_fold(skb->sk); 139 } 140 141 return res; 142 } 143 144 static u32 flow_get_proto_dst(const struct sk_buff *skb) 145 { 146 u32 res = 0; 147 148 switch (skb->protocol) { 149 case __constant_htons(ETH_P_IP): { 150 struct iphdr *iph = ip_hdr(skb); 151 152 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 153 has_ports(iph->protocol)) 154 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 155 break; 156 } 157 case __constant_htons(ETH_P_IPV6): { 158 struct ipv6hdr *iph = ipv6_hdr(skb); 159 160 if (has_ports(iph->nexthdr)) 161 res = ntohs(*(__be16 *)((void *)&iph[1] + 2)); 162 break; 163 } 164 default: 165 res = addr_fold(skb->dst) ^ (__force u16)skb->protocol; 166 } 167 168 return res; 169 } 170 171 static u32 flow_get_iif(const struct sk_buff *skb) 172 { 173 return skb->iif; 174 } 175 176 static u32 flow_get_priority(const struct sk_buff *skb) 177 { 178 return skb->priority; 179 } 180 181 static u32 flow_get_mark(const struct sk_buff *skb) 182 { 183 return skb->mark; 184 } 185 186 static u32 flow_get_nfct(const struct sk_buff *skb) 187 { 188 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 189 return addr_fold(skb->nfct); 190 #else 191 return 0; 192 #endif 193 } 194 195 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 196 #define CTTUPLE(skb, member) \ 197 ({ \ 198 enum ip_conntrack_info ctinfo; \ 199 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ 200 if (ct == NULL) \ 201 goto fallback; \ 202 ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ 203 }) 204 #else 205 #define CTTUPLE(skb, member) \ 206 ({ \ 207 goto fallback; \ 208 0; \ 209 }) 210 #endif 211 212 static u32 flow_get_nfct_src(const struct sk_buff *skb) 213 { 214 switch (skb->protocol) { 215 case __constant_htons(ETH_P_IP): 216 return ntohl(CTTUPLE(skb, src.u3.ip)); 217 case __constant_htons(ETH_P_IPV6): 218 return ntohl(CTTUPLE(skb, src.u3.ip6[3])); 219 } 220 fallback: 221 return flow_get_src(skb); 222 } 223 224 static u32 flow_get_nfct_dst(const struct sk_buff *skb) 225 { 226 switch (skb->protocol) { 227 case __constant_htons(ETH_P_IP): 228 return ntohl(CTTUPLE(skb, dst.u3.ip)); 229 case __constant_htons(ETH_P_IPV6): 230 return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); 231 } 232 fallback: 233 return flow_get_dst(skb); 234 } 235 236 static u32 flow_get_nfct_proto_src(const struct sk_buff *skb) 237 { 238 return ntohs(CTTUPLE(skb, src.u.all)); 239 fallback: 240 return flow_get_proto_src(skb); 241 } 242 243 static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb) 244 { 245 return ntohs(CTTUPLE(skb, dst.u.all)); 246 fallback: 247 return flow_get_proto_dst(skb); 248 } 249 250 static u32 flow_get_rtclassid(const struct sk_buff *skb) 251 { 252 #ifdef CONFIG_NET_CLS_ROUTE 253 if (skb->dst) 254 return skb->dst->tclassid; 255 #endif 256 return 0; 257 } 258 259 static u32 flow_get_skuid(const struct sk_buff *skb) 260 { 261 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) 262 return skb->sk->sk_socket->file->f_uid; 263 return 0; 264 } 265 266 static u32 flow_get_skgid(const struct sk_buff *skb) 267 { 268 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) 269 return skb->sk->sk_socket->file->f_gid; 270 return 0; 271 } 272 273 static u32 flow_key_get(const struct sk_buff *skb, int key) 274 { 275 switch (key) { 276 case FLOW_KEY_SRC: 277 return flow_get_src(skb); 278 case FLOW_KEY_DST: 279 return flow_get_dst(skb); 280 case FLOW_KEY_PROTO: 281 return flow_get_proto(skb); 282 case FLOW_KEY_PROTO_SRC: 283 return flow_get_proto_src(skb); 284 case FLOW_KEY_PROTO_DST: 285 return flow_get_proto_dst(skb); 286 case FLOW_KEY_IIF: 287 return flow_get_iif(skb); 288 case FLOW_KEY_PRIORITY: 289 return flow_get_priority(skb); 290 case FLOW_KEY_MARK: 291 return flow_get_mark(skb); 292 case FLOW_KEY_NFCT: 293 return flow_get_nfct(skb); 294 case FLOW_KEY_NFCT_SRC: 295 return flow_get_nfct_src(skb); 296 case FLOW_KEY_NFCT_DST: 297 return flow_get_nfct_dst(skb); 298 case FLOW_KEY_NFCT_PROTO_SRC: 299 return flow_get_nfct_proto_src(skb); 300 case FLOW_KEY_NFCT_PROTO_DST: 301 return flow_get_nfct_proto_dst(skb); 302 case FLOW_KEY_RTCLASSID: 303 return flow_get_rtclassid(skb); 304 case FLOW_KEY_SKUID: 305 return flow_get_skuid(skb); 306 case FLOW_KEY_SKGID: 307 return flow_get_skgid(skb); 308 default: 309 WARN_ON(1); 310 return 0; 311 } 312 } 313 314 static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, 315 struct tcf_result *res) 316 { 317 struct flow_head *head = tp->root; 318 struct flow_filter *f; 319 u32 keymask; 320 u32 classid; 321 unsigned int n, key; 322 int r; 323 324 list_for_each_entry(f, &head->filters, list) { 325 u32 keys[f->nkeys]; 326 327 if (!tcf_em_tree_match(skb, &f->ematches, NULL)) 328 continue; 329 330 keymask = f->keymask; 331 332 for (n = 0; n < f->nkeys; n++) { 333 key = ffs(keymask) - 1; 334 keymask &= ~(1 << key); 335 keys[n] = flow_key_get(skb, key); 336 } 337 338 if (f->mode == FLOW_MODE_HASH) 339 classid = jhash2(keys, f->nkeys, flow_hashrnd); 340 else { 341 classid = keys[0]; 342 classid = (classid & f->mask) ^ f->xor; 343 classid = (classid >> f->rshift) + f->addend; 344 } 345 346 if (f->divisor) 347 classid %= f->divisor; 348 349 res->class = 0; 350 res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid); 351 352 r = tcf_exts_exec(skb, &f->exts, res); 353 if (r < 0) 354 continue; 355 return r; 356 } 357 return -1; 358 } 359 360 static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { 361 [TCA_FLOW_KEYS] = { .type = NLA_U32 }, 362 [TCA_FLOW_MODE] = { .type = NLA_U32 }, 363 [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, 364 [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, 365 [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, 366 [TCA_FLOW_MASK] = { .type = NLA_U32 }, 367 [TCA_FLOW_XOR] = { .type = NLA_U32 }, 368 [TCA_FLOW_DIVISOR] = { .type = NLA_U32 }, 369 [TCA_FLOW_ACT] = { .type = NLA_NESTED }, 370 [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, 371 [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, 372 }; 373 374 static int flow_change(struct tcf_proto *tp, unsigned long base, 375 u32 handle, struct nlattr **tca, 376 unsigned long *arg) 377 { 378 struct flow_head *head = tp->root; 379 struct flow_filter *f; 380 struct nlattr *opt = tca[TCA_OPTIONS]; 381 struct nlattr *tb[TCA_FLOW_MAX + 1]; 382 struct tcf_exts e; 383 struct tcf_ematch_tree t; 384 unsigned int nkeys = 0; 385 u32 baseclass = 0; 386 u32 keymask = 0; 387 u32 mode; 388 int err; 389 390 if (opt == NULL) 391 return -EINVAL; 392 393 err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy); 394 if (err < 0) 395 return err; 396 397 if (tb[TCA_FLOW_BASECLASS]) { 398 baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]); 399 if (TC_H_MIN(baseclass) == 0) 400 return -EINVAL; 401 } 402 403 if (tb[TCA_FLOW_KEYS]) { 404 keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); 405 if (fls(keymask) - 1 > FLOW_KEY_MAX) 406 return -EOPNOTSUPP; 407 408 nkeys = hweight32(keymask); 409 if (nkeys == 0) 410 return -EINVAL; 411 } 412 413 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); 414 if (err < 0) 415 return err; 416 417 err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); 418 if (err < 0) 419 goto err1; 420 421 f = (struct flow_filter *)*arg; 422 if (f != NULL) { 423 err = -EINVAL; 424 if (f->handle != handle && handle) 425 goto err2; 426 427 mode = f->mode; 428 if (tb[TCA_FLOW_MODE]) 429 mode = nla_get_u32(tb[TCA_FLOW_MODE]); 430 if (mode != FLOW_MODE_HASH && nkeys > 1) 431 goto err2; 432 } else { 433 err = -EINVAL; 434 if (!handle) 435 goto err2; 436 if (!tb[TCA_FLOW_KEYS]) 437 goto err2; 438 439 mode = FLOW_MODE_MAP; 440 if (tb[TCA_FLOW_MODE]) 441 mode = nla_get_u32(tb[TCA_FLOW_MODE]); 442 if (mode != FLOW_MODE_HASH && nkeys > 1) 443 goto err2; 444 445 if (TC_H_MAJ(baseclass) == 0) 446 baseclass = TC_H_MAKE(tp->q->handle, baseclass); 447 if (TC_H_MIN(baseclass) == 0) 448 baseclass = TC_H_MAKE(baseclass, 1); 449 450 err = -ENOBUFS; 451 f = kzalloc(sizeof(*f), GFP_KERNEL); 452 if (f == NULL) 453 goto err2; 454 455 f->handle = handle; 456 f->mask = ~0U; 457 } 458 459 tcf_exts_change(tp, &f->exts, &e); 460 tcf_em_tree_change(tp, &f->ematches, &t); 461 462 tcf_tree_lock(tp); 463 464 if (tb[TCA_FLOW_KEYS]) { 465 f->keymask = keymask; 466 f->nkeys = nkeys; 467 } 468 469 f->mode = mode; 470 471 if (tb[TCA_FLOW_MASK]) 472 f->mask = nla_get_u32(tb[TCA_FLOW_MASK]); 473 if (tb[TCA_FLOW_XOR]) 474 f->xor = nla_get_u32(tb[TCA_FLOW_XOR]); 475 if (tb[TCA_FLOW_RSHIFT]) 476 f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); 477 if (tb[TCA_FLOW_ADDEND]) 478 f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); 479 480 if (tb[TCA_FLOW_DIVISOR]) 481 f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); 482 if (baseclass) 483 f->baseclass = baseclass; 484 485 if (*arg == 0) 486 list_add_tail(&f->list, &head->filters); 487 488 tcf_tree_unlock(tp); 489 490 *arg = (unsigned long)f; 491 return 0; 492 493 err2: 494 tcf_em_tree_destroy(tp, &t); 495 err1: 496 tcf_exts_destroy(tp, &e); 497 return err; 498 } 499 500 static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) 501 { 502 tcf_exts_destroy(tp, &f->exts); 503 tcf_em_tree_destroy(tp, &f->ematches); 504 kfree(f); 505 } 506 507 static int flow_delete(struct tcf_proto *tp, unsigned long arg) 508 { 509 struct flow_filter *f = (struct flow_filter *)arg; 510 511 tcf_tree_lock(tp); 512 list_del(&f->list); 513 tcf_tree_unlock(tp); 514 flow_destroy_filter(tp, f); 515 return 0; 516 } 517 518 static int flow_init(struct tcf_proto *tp) 519 { 520 struct flow_head *head; 521 522 if (!flow_hashrnd_initted) { 523 get_random_bytes(&flow_hashrnd, 4); 524 flow_hashrnd_initted = 1; 525 } 526 527 head = kzalloc(sizeof(*head), GFP_KERNEL); 528 if (head == NULL) 529 return -ENOBUFS; 530 INIT_LIST_HEAD(&head->filters); 531 tp->root = head; 532 return 0; 533 } 534 535 static void flow_destroy(struct tcf_proto *tp) 536 { 537 struct flow_head *head = tp->root; 538 struct flow_filter *f, *next; 539 540 list_for_each_entry_safe(f, next, &head->filters, list) { 541 list_del(&f->list); 542 flow_destroy_filter(tp, f); 543 } 544 kfree(head); 545 } 546 547 static unsigned long flow_get(struct tcf_proto *tp, u32 handle) 548 { 549 struct flow_head *head = tp->root; 550 struct flow_filter *f; 551 552 list_for_each_entry(f, &head->filters, list) 553 if (f->handle == handle) 554 return (unsigned long)f; 555 return 0; 556 } 557 558 static void flow_put(struct tcf_proto *tp, unsigned long f) 559 { 560 return; 561 } 562 563 static int flow_dump(struct tcf_proto *tp, unsigned long fh, 564 struct sk_buff *skb, struct tcmsg *t) 565 { 566 struct flow_filter *f = (struct flow_filter *)fh; 567 struct nlattr *nest; 568 569 if (f == NULL) 570 return skb->len; 571 572 t->tcm_handle = f->handle; 573 574 nest = nla_nest_start(skb, TCA_OPTIONS); 575 if (nest == NULL) 576 goto nla_put_failure; 577 578 NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask); 579 NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode); 580 581 if (f->mask != ~0 || f->xor != 0) { 582 NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask); 583 NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor); 584 } 585 if (f->rshift) 586 NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift); 587 if (f->addend) 588 NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend); 589 590 if (f->divisor) 591 NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor); 592 if (f->baseclass) 593 NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass); 594 595 if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) 596 goto nla_put_failure; 597 598 if (f->ematches.hdr.nmatches && 599 tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) 600 goto nla_put_failure; 601 602 nla_nest_end(skb, nest); 603 604 if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0) 605 goto nla_put_failure; 606 607 return skb->len; 608 609 nla_put_failure: 610 nlmsg_trim(skb, nest); 611 return -1; 612 } 613 614 static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) 615 { 616 struct flow_head *head = tp->root; 617 struct flow_filter *f; 618 619 list_for_each_entry(f, &head->filters, list) { 620 if (arg->count < arg->skip) 621 goto skip; 622 if (arg->fn(tp, (unsigned long)f, arg) < 0) { 623 arg->stop = 1; 624 break; 625 } 626 skip: 627 arg->count++; 628 } 629 } 630 631 static struct tcf_proto_ops cls_flow_ops __read_mostly = { 632 .kind = "flow", 633 .classify = flow_classify, 634 .init = flow_init, 635 .destroy = flow_destroy, 636 .change = flow_change, 637 .delete = flow_delete, 638 .get = flow_get, 639 .put = flow_put, 640 .dump = flow_dump, 641 .walk = flow_walk, 642 .owner = THIS_MODULE, 643 }; 644 645 static int __init cls_flow_init(void) 646 { 647 return register_tcf_proto_ops(&cls_flow_ops); 648 } 649 650 static void __exit cls_flow_exit(void) 651 { 652 unregister_tcf_proto_ops(&cls_flow_ops); 653 } 654 655 module_init(cls_flow_init); 656 module_exit(cls_flow_exit); 657 658 MODULE_LICENSE("GPL"); 659 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 660 MODULE_DESCRIPTION("TC flow classifier"); 661