xref: /linux/net/sched/cls_flow.c (revision 643d1f7fe3aa12c8bdea6fa5b4ba874ff6dd601d)
1 /*
2  * net/sched/cls_flow.c		Generic flow classifier
3  *
4  * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/list.h>
15 #include <linux/jhash.h>
16 #include <linux/random.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/skbuff.h>
19 #include <linux/in.h>
20 #include <linux/ip.h>
21 #include <linux/ipv6.h>
22 
23 #include <net/pkt_cls.h>
24 #include <net/ip.h>
25 #include <net/route.h>
26 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
27 #include <net/netfilter/nf_conntrack.h>
28 #endif
29 
30 struct flow_head {
31 	struct list_head	filters;
32 };
33 
34 struct flow_filter {
35 	struct list_head	list;
36 	struct tcf_exts		exts;
37 	struct tcf_ematch_tree	ematches;
38 	u32			handle;
39 
40 	u32			nkeys;
41 	u32			keymask;
42 	u32			mode;
43 	u32			mask;
44 	u32			xor;
45 	u32			rshift;
46 	u32			addend;
47 	u32			divisor;
48 	u32			baseclass;
49 };
50 
51 static u32 flow_hashrnd __read_mostly;
52 static int flow_hashrnd_initted __read_mostly;
53 
54 static const struct tcf_ext_map flow_ext_map = {
55 	.action	= TCA_FLOW_ACT,
56 	.police	= TCA_FLOW_POLICE,
57 };
58 
59 static inline u32 addr_fold(void *addr)
60 {
61 	unsigned long a = (unsigned long)addr;
62 
63 	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
64 }
65 
66 static u32 flow_get_src(const struct sk_buff *skb)
67 {
68 	switch (skb->protocol) {
69 	case __constant_htons(ETH_P_IP):
70 		return ntohl(ip_hdr(skb)->saddr);
71 	case __constant_htons(ETH_P_IPV6):
72 		return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
73 	default:
74 		return addr_fold(skb->sk);
75 	}
76 }
77 
78 static u32 flow_get_dst(const struct sk_buff *skb)
79 {
80 	switch (skb->protocol) {
81 	case __constant_htons(ETH_P_IP):
82 		return ntohl(ip_hdr(skb)->daddr);
83 	case __constant_htons(ETH_P_IPV6):
84 		return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
85 	default:
86 		return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
87 	}
88 }
89 
90 static u32 flow_get_proto(const struct sk_buff *skb)
91 {
92 	switch (skb->protocol) {
93 	case __constant_htons(ETH_P_IP):
94 		return ip_hdr(skb)->protocol;
95 	case __constant_htons(ETH_P_IPV6):
96 		return ipv6_hdr(skb)->nexthdr;
97 	default:
98 		return 0;
99 	}
100 }
101 
102 static int has_ports(u8 protocol)
103 {
104 	switch (protocol) {
105 	case IPPROTO_TCP:
106 	case IPPROTO_UDP:
107 	case IPPROTO_UDPLITE:
108 	case IPPROTO_SCTP:
109 	case IPPROTO_DCCP:
110 	case IPPROTO_ESP:
111 		return 1;
112 	default:
113 		return 0;
114 	}
115 }
116 
117 static u32 flow_get_proto_src(const struct sk_buff *skb)
118 {
119 	u32 res = 0;
120 
121 	switch (skb->protocol) {
122 	case __constant_htons(ETH_P_IP): {
123 		struct iphdr *iph = ip_hdr(skb);
124 
125 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
126 		    has_ports(iph->protocol))
127 			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
128 		break;
129 	}
130 	case __constant_htons(ETH_P_IPV6): {
131 		struct ipv6hdr *iph = ipv6_hdr(skb);
132 
133 		if (has_ports(iph->nexthdr))
134 			res = ntohs(*(__be16 *)&iph[1]);
135 		break;
136 	}
137 	default:
138 		res = addr_fold(skb->sk);
139 	}
140 
141 	return res;
142 }
143 
144 static u32 flow_get_proto_dst(const struct sk_buff *skb)
145 {
146 	u32 res = 0;
147 
148 	switch (skb->protocol) {
149 	case __constant_htons(ETH_P_IP): {
150 		struct iphdr *iph = ip_hdr(skb);
151 
152 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
153 		    has_ports(iph->protocol))
154 			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
155 		break;
156 	}
157 	case __constant_htons(ETH_P_IPV6): {
158 		struct ipv6hdr *iph = ipv6_hdr(skb);
159 
160 		if (has_ports(iph->nexthdr))
161 			res = ntohs(*(__be16 *)((void *)&iph[1] + 2));
162 		break;
163 	}
164 	default:
165 		res = addr_fold(skb->dst) ^ (__force u16)skb->protocol;
166 	}
167 
168 	return res;
169 }
170 
171 static u32 flow_get_iif(const struct sk_buff *skb)
172 {
173 	return skb->iif;
174 }
175 
176 static u32 flow_get_priority(const struct sk_buff *skb)
177 {
178 	return skb->priority;
179 }
180 
181 static u32 flow_get_mark(const struct sk_buff *skb)
182 {
183 	return skb->mark;
184 }
185 
186 static u32 flow_get_nfct(const struct sk_buff *skb)
187 {
188 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
189 	return addr_fold(skb->nfct);
190 #else
191 	return 0;
192 #endif
193 }
194 
195 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
196 #define CTTUPLE(skb, member)						\
197 ({									\
198 	enum ip_conntrack_info ctinfo;					\
199 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);			\
200 	if (ct == NULL)							\
201 		goto fallback;						\
202 	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
203 })
204 #else
205 #define CTTUPLE(skb, member)						\
206 ({									\
207 	goto fallback;							\
208 	0;								\
209 })
210 #endif
211 
212 static u32 flow_get_nfct_src(const struct sk_buff *skb)
213 {
214 	switch (skb->protocol) {
215 	case __constant_htons(ETH_P_IP):
216 		return ntohl(CTTUPLE(skb, src.u3.ip));
217 	case __constant_htons(ETH_P_IPV6):
218 		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
219 	}
220 fallback:
221 	return flow_get_src(skb);
222 }
223 
224 static u32 flow_get_nfct_dst(const struct sk_buff *skb)
225 {
226 	switch (skb->protocol) {
227 	case __constant_htons(ETH_P_IP):
228 		return ntohl(CTTUPLE(skb, dst.u3.ip));
229 	case __constant_htons(ETH_P_IPV6):
230 		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
231 	}
232 fallback:
233 	return flow_get_dst(skb);
234 }
235 
236 static u32 flow_get_nfct_proto_src(const struct sk_buff *skb)
237 {
238 	return ntohs(CTTUPLE(skb, src.u.all));
239 fallback:
240 	return flow_get_proto_src(skb);
241 }
242 
243 static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb)
244 {
245 	return ntohs(CTTUPLE(skb, dst.u.all));
246 fallback:
247 	return flow_get_proto_dst(skb);
248 }
249 
250 static u32 flow_get_rtclassid(const struct sk_buff *skb)
251 {
252 #ifdef CONFIG_NET_CLS_ROUTE
253 	if (skb->dst)
254 		return skb->dst->tclassid;
255 #endif
256 	return 0;
257 }
258 
259 static u32 flow_get_skuid(const struct sk_buff *skb)
260 {
261 	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
262 		return skb->sk->sk_socket->file->f_uid;
263 	return 0;
264 }
265 
266 static u32 flow_get_skgid(const struct sk_buff *skb)
267 {
268 	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
269 		return skb->sk->sk_socket->file->f_gid;
270 	return 0;
271 }
272 
273 static u32 flow_key_get(const struct sk_buff *skb, int key)
274 {
275 	switch (key) {
276 	case FLOW_KEY_SRC:
277 		return flow_get_src(skb);
278 	case FLOW_KEY_DST:
279 		return flow_get_dst(skb);
280 	case FLOW_KEY_PROTO:
281 		return flow_get_proto(skb);
282 	case FLOW_KEY_PROTO_SRC:
283 		return flow_get_proto_src(skb);
284 	case FLOW_KEY_PROTO_DST:
285 		return flow_get_proto_dst(skb);
286 	case FLOW_KEY_IIF:
287 		return flow_get_iif(skb);
288 	case FLOW_KEY_PRIORITY:
289 		return flow_get_priority(skb);
290 	case FLOW_KEY_MARK:
291 		return flow_get_mark(skb);
292 	case FLOW_KEY_NFCT:
293 		return flow_get_nfct(skb);
294 	case FLOW_KEY_NFCT_SRC:
295 		return flow_get_nfct_src(skb);
296 	case FLOW_KEY_NFCT_DST:
297 		return flow_get_nfct_dst(skb);
298 	case FLOW_KEY_NFCT_PROTO_SRC:
299 		return flow_get_nfct_proto_src(skb);
300 	case FLOW_KEY_NFCT_PROTO_DST:
301 		return flow_get_nfct_proto_dst(skb);
302 	case FLOW_KEY_RTCLASSID:
303 		return flow_get_rtclassid(skb);
304 	case FLOW_KEY_SKUID:
305 		return flow_get_skuid(skb);
306 	case FLOW_KEY_SKGID:
307 		return flow_get_skgid(skb);
308 	default:
309 		WARN_ON(1);
310 		return 0;
311 	}
312 }
313 
314 static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
315 			 struct tcf_result *res)
316 {
317 	struct flow_head *head = tp->root;
318 	struct flow_filter *f;
319 	u32 keymask;
320 	u32 classid;
321 	unsigned int n, key;
322 	int r;
323 
324 	list_for_each_entry(f, &head->filters, list) {
325 		u32 keys[f->nkeys];
326 
327 		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
328 			continue;
329 
330 		keymask = f->keymask;
331 
332 		for (n = 0; n < f->nkeys; n++) {
333 			key = ffs(keymask) - 1;
334 			keymask &= ~(1 << key);
335 			keys[n] = flow_key_get(skb, key);
336 		}
337 
338 		if (f->mode == FLOW_MODE_HASH)
339 			classid = jhash2(keys, f->nkeys, flow_hashrnd);
340 		else {
341 			classid = keys[0];
342 			classid = (classid & f->mask) ^ f->xor;
343 			classid = (classid >> f->rshift) + f->addend;
344 		}
345 
346 		if (f->divisor)
347 			classid %= f->divisor;
348 
349 		res->class   = 0;
350 		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
351 
352 		r = tcf_exts_exec(skb, &f->exts, res);
353 		if (r < 0)
354 			continue;
355 		return r;
356 	}
357 	return -1;
358 }
359 
360 static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
361 	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
362 	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
363 	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
364 	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
365 	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
366 	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
367 	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
368 	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
369 	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
370 	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
371 	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
372 };
373 
374 static int flow_change(struct tcf_proto *tp, unsigned long base,
375 		       u32 handle, struct nlattr **tca,
376 		       unsigned long *arg)
377 {
378 	struct flow_head *head = tp->root;
379 	struct flow_filter *f;
380 	struct nlattr *opt = tca[TCA_OPTIONS];
381 	struct nlattr *tb[TCA_FLOW_MAX + 1];
382 	struct tcf_exts e;
383 	struct tcf_ematch_tree t;
384 	unsigned int nkeys = 0;
385 	u32 baseclass = 0;
386 	u32 keymask = 0;
387 	u32 mode;
388 	int err;
389 
390 	if (opt == NULL)
391 		return -EINVAL;
392 
393 	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
394 	if (err < 0)
395 		return err;
396 
397 	if (tb[TCA_FLOW_BASECLASS]) {
398 		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
399 		if (TC_H_MIN(baseclass) == 0)
400 			return -EINVAL;
401 	}
402 
403 	if (tb[TCA_FLOW_KEYS]) {
404 		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);
405 		if (fls(keymask) - 1 > FLOW_KEY_MAX)
406 			return -EOPNOTSUPP;
407 
408 		nkeys = hweight32(keymask);
409 		if (nkeys == 0)
410 			return -EINVAL;
411 	}
412 
413 	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
414 	if (err < 0)
415 		return err;
416 
417 	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
418 	if (err < 0)
419 		goto err1;
420 
421 	f = (struct flow_filter *)*arg;
422 	if (f != NULL) {
423 		err = -EINVAL;
424 		if (f->handle != handle && handle)
425 			goto err2;
426 
427 		mode = f->mode;
428 		if (tb[TCA_FLOW_MODE])
429 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
430 		if (mode != FLOW_MODE_HASH && nkeys > 1)
431 			goto err2;
432 	} else {
433 		err = -EINVAL;
434 		if (!handle)
435 			goto err2;
436 		if (!tb[TCA_FLOW_KEYS])
437 			goto err2;
438 
439 		mode = FLOW_MODE_MAP;
440 		if (tb[TCA_FLOW_MODE])
441 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
442 		if (mode != FLOW_MODE_HASH && nkeys > 1)
443 			goto err2;
444 
445 		if (TC_H_MAJ(baseclass) == 0)
446 			baseclass = TC_H_MAKE(tp->q->handle, baseclass);
447 		if (TC_H_MIN(baseclass) == 0)
448 			baseclass = TC_H_MAKE(baseclass, 1);
449 
450 		err = -ENOBUFS;
451 		f = kzalloc(sizeof(*f), GFP_KERNEL);
452 		if (f == NULL)
453 			goto err2;
454 
455 		f->handle = handle;
456 		f->mask	  = ~0U;
457 	}
458 
459 	tcf_exts_change(tp, &f->exts, &e);
460 	tcf_em_tree_change(tp, &f->ematches, &t);
461 
462 	tcf_tree_lock(tp);
463 
464 	if (tb[TCA_FLOW_KEYS]) {
465 		f->keymask = keymask;
466 		f->nkeys   = nkeys;
467 	}
468 
469 	f->mode = mode;
470 
471 	if (tb[TCA_FLOW_MASK])
472 		f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
473 	if (tb[TCA_FLOW_XOR])
474 		f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
475 	if (tb[TCA_FLOW_RSHIFT])
476 		f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
477 	if (tb[TCA_FLOW_ADDEND])
478 		f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
479 
480 	if (tb[TCA_FLOW_DIVISOR])
481 		f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
482 	if (baseclass)
483 		f->baseclass = baseclass;
484 
485 	if (*arg == 0)
486 		list_add_tail(&f->list, &head->filters);
487 
488 	tcf_tree_unlock(tp);
489 
490 	*arg = (unsigned long)f;
491 	return 0;
492 
493 err2:
494 	tcf_em_tree_destroy(tp, &t);
495 err1:
496 	tcf_exts_destroy(tp, &e);
497 	return err;
498 }
499 
500 static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
501 {
502 	tcf_exts_destroy(tp, &f->exts);
503 	tcf_em_tree_destroy(tp, &f->ematches);
504 	kfree(f);
505 }
506 
507 static int flow_delete(struct tcf_proto *tp, unsigned long arg)
508 {
509 	struct flow_filter *f = (struct flow_filter *)arg;
510 
511 	tcf_tree_lock(tp);
512 	list_del(&f->list);
513 	tcf_tree_unlock(tp);
514 	flow_destroy_filter(tp, f);
515 	return 0;
516 }
517 
518 static int flow_init(struct tcf_proto *tp)
519 {
520 	struct flow_head *head;
521 
522 	if (!flow_hashrnd_initted) {
523 		get_random_bytes(&flow_hashrnd, 4);
524 		flow_hashrnd_initted = 1;
525 	}
526 
527 	head = kzalloc(sizeof(*head), GFP_KERNEL);
528 	if (head == NULL)
529 		return -ENOBUFS;
530 	INIT_LIST_HEAD(&head->filters);
531 	tp->root = head;
532 	return 0;
533 }
534 
535 static void flow_destroy(struct tcf_proto *tp)
536 {
537 	struct flow_head *head = tp->root;
538 	struct flow_filter *f, *next;
539 
540 	list_for_each_entry_safe(f, next, &head->filters, list) {
541 		list_del(&f->list);
542 		flow_destroy_filter(tp, f);
543 	}
544 	kfree(head);
545 }
546 
547 static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
548 {
549 	struct flow_head *head = tp->root;
550 	struct flow_filter *f;
551 
552 	list_for_each_entry(f, &head->filters, list)
553 		if (f->handle == handle)
554 			return (unsigned long)f;
555 	return 0;
556 }
557 
558 static void flow_put(struct tcf_proto *tp, unsigned long f)
559 {
560 	return;
561 }
562 
563 static int flow_dump(struct tcf_proto *tp, unsigned long fh,
564 		     struct sk_buff *skb, struct tcmsg *t)
565 {
566 	struct flow_filter *f = (struct flow_filter *)fh;
567 	struct nlattr *nest;
568 
569 	if (f == NULL)
570 		return skb->len;
571 
572 	t->tcm_handle = f->handle;
573 
574 	nest = nla_nest_start(skb, TCA_OPTIONS);
575 	if (nest == NULL)
576 		goto nla_put_failure;
577 
578 	NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask);
579 	NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode);
580 
581 	if (f->mask != ~0 || f->xor != 0) {
582 		NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask);
583 		NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor);
584 	}
585 	if (f->rshift)
586 		NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift);
587 	if (f->addend)
588 		NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend);
589 
590 	if (f->divisor)
591 		NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor);
592 	if (f->baseclass)
593 		NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
594 
595 	if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
596 		goto nla_put_failure;
597 
598 	if (f->ematches.hdr.nmatches &&
599 	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
600 		goto nla_put_failure;
601 
602 	nla_nest_end(skb, nest);
603 
604 	if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
605 		goto nla_put_failure;
606 
607 	return skb->len;
608 
609 nla_put_failure:
610 	nlmsg_trim(skb, nest);
611 	return -1;
612 }
613 
614 static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
615 {
616 	struct flow_head *head = tp->root;
617 	struct flow_filter *f;
618 
619 	list_for_each_entry(f, &head->filters, list) {
620 		if (arg->count < arg->skip)
621 			goto skip;
622 		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
623 			arg->stop = 1;
624 			break;
625 		}
626 skip:
627 		arg->count++;
628 	}
629 }
630 
631 static struct tcf_proto_ops cls_flow_ops __read_mostly = {
632 	.kind		= "flow",
633 	.classify	= flow_classify,
634 	.init		= flow_init,
635 	.destroy	= flow_destroy,
636 	.change		= flow_change,
637 	.delete		= flow_delete,
638 	.get		= flow_get,
639 	.put		= flow_put,
640 	.dump		= flow_dump,
641 	.walk		= flow_walk,
642 	.owner		= THIS_MODULE,
643 };
644 
645 static int __init cls_flow_init(void)
646 {
647 	return register_tcf_proto_ops(&cls_flow_ops);
648 }
649 
650 static void __exit cls_flow_exit(void)
651 {
652 	unregister_tcf_proto_ops(&cls_flow_ops);
653 }
654 
655 module_init(cls_flow_init);
656 module_exit(cls_flow_exit);
657 
658 MODULE_LICENSE("GPL");
659 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
660 MODULE_DESCRIPTION("TC flow classifier");
661