xref: /linux/net/sched/act_police.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  * net/sched/police.c	Input police filter.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  * 		J Hadi Salim (action changes)
11  */
12 
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/module.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
33 #include <net/sock.h>
34 #include <net/act_api.h>
35 
36 #define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
37 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
38 #define PRIV(a) ((struct tcf_police *) (a)->priv)
39 
40 /* use generic hash table */
41 #define MY_TAB_SIZE     16
42 #define MY_TAB_MASK     15
43 static u32 idx_gen;
44 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
45 /* Policer hash table lock */
46 static DEFINE_RWLOCK(police_lock);
47 
48 /* Each policer is serialized by its individual spinlock */
49 
50 static __inline__ unsigned tcf_police_hash(u32 index)
51 {
52 	return index&0xF;
53 }
54 
55 static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
56 {
57 	struct tcf_police *p;
58 
59 	read_lock(&police_lock);
60 	for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
61 		if (p->index == index)
62 			break;
63 	}
64 	read_unlock(&police_lock);
65 	return p;
66 }
67 
68 #ifdef CONFIG_NET_CLS_ACT
69 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
70                               int type, struct tc_action *a)
71 {
72 	struct tcf_police *p;
73 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
74 	struct rtattr *r;
75 
76 	read_lock(&police_lock);
77 
78 	s_i = cb->args[0];
79 
80 	for (i = 0; i < MY_TAB_SIZE; i++) {
81 		p = tcf_police_ht[tcf_police_hash(i)];
82 
83 		for (; p; p = p->next) {
84 			index++;
85 			if (index < s_i)
86 				continue;
87 			a->priv = p;
88 			a->order = index;
89 			r = (struct rtattr*) skb->tail;
90 			RTA_PUT(skb, a->order, 0, NULL);
91 			if (type == RTM_DELACTION)
92 				err = tcf_action_dump_1(skb, a, 0, 1);
93 			else
94 				err = tcf_action_dump_1(skb, a, 0, 0);
95 			if (err < 0) {
96 				index--;
97 				skb_trim(skb, (u8*)r - skb->data);
98 				goto done;
99 			}
100 			r->rta_len = skb->tail - (u8*)r;
101 			n_i++;
102 		}
103 	}
104 done:
105 	read_unlock(&police_lock);
106 	if (n_i)
107 		cb->args[0] += n_i;
108 	return n_i;
109 
110 rtattr_failure:
111 	skb_trim(skb, (u8*)r - skb->data);
112 	goto done;
113 }
114 
115 static inline int
116 tcf_act_police_hash_search(struct tc_action *a, u32 index)
117 {
118 	struct tcf_police *p = tcf_police_lookup(index);
119 
120 	if (p != NULL) {
121 		a->priv = p;
122 		return 1;
123 	} else {
124 		return 0;
125 	}
126 }
127 #endif
128 
129 static inline u32 tcf_police_new_index(void)
130 {
131 	do {
132 		if (++idx_gen == 0)
133 			idx_gen = 1;
134 	} while (tcf_police_lookup(idx_gen));
135 
136 	return idx_gen;
137 }
138 
139 void tcf_police_destroy(struct tcf_police *p)
140 {
141 	unsigned h = tcf_police_hash(p->index);
142 	struct tcf_police **p1p;
143 
144 	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
145 		if (*p1p == p) {
146 			write_lock_bh(&police_lock);
147 			*p1p = p->next;
148 			write_unlock_bh(&police_lock);
149 #ifdef CONFIG_NET_ESTIMATOR
150 			gen_kill_estimator(&p->bstats, &p->rate_est);
151 #endif
152 			if (p->R_tab)
153 				qdisc_put_rtab(p->R_tab);
154 			if (p->P_tab)
155 				qdisc_put_rtab(p->P_tab);
156 			kfree(p);
157 			return;
158 		}
159 	}
160 	BUG_TRAP(0);
161 }
162 
163 #ifdef CONFIG_NET_CLS_ACT
164 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
165                                  struct tc_action *a, int ovr, int bind)
166 {
167 	unsigned h;
168 	int ret = 0, err;
169 	struct rtattr *tb[TCA_POLICE_MAX];
170 	struct tc_police *parm;
171 	struct tcf_police *p;
172 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
173 
174 	if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
175 		return -EINVAL;
176 
177 	if (tb[TCA_POLICE_TBF-1] == NULL ||
178 	    RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
179 		return -EINVAL;
180 	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
181 
182 	if (tb[TCA_POLICE_RESULT-1] != NULL &&
183 	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
184 		return -EINVAL;
185 	if (tb[TCA_POLICE_RESULT-1] != NULL &&
186 	    RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
187 		return -EINVAL;
188 
189 	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
190 		a->priv = p;
191 		if (bind) {
192 			p->bindcnt += 1;
193 			p->refcnt += 1;
194 		}
195 		if (ovr)
196 			goto override;
197 		return ret;
198 	}
199 
200 	p = kmalloc(sizeof(*p), GFP_KERNEL);
201 	if (p == NULL)
202 		return -ENOMEM;
203 	memset(p, 0, sizeof(*p));
204 
205 	ret = ACT_P_CREATED;
206 	p->refcnt = 1;
207 	spin_lock_init(&p->lock);
208 	p->stats_lock = &p->lock;
209 	if (bind)
210 		p->bindcnt = 1;
211 override:
212 	if (parm->rate.rate) {
213 		err = -ENOMEM;
214 		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
215 		if (R_tab == NULL)
216 			goto failure;
217 		if (parm->peakrate.rate) {
218 			P_tab = qdisc_get_rtab(&parm->peakrate,
219 					       tb[TCA_POLICE_PEAKRATE-1]);
220 			if (p->P_tab == NULL) {
221 				qdisc_put_rtab(R_tab);
222 				goto failure;
223 			}
224 		}
225 	}
226 	/* No failure allowed after this point */
227 	spin_lock_bh(&p->lock);
228 	if (R_tab != NULL) {
229 		qdisc_put_rtab(p->R_tab);
230 		p->R_tab = R_tab;
231 	}
232 	if (P_tab != NULL) {
233 		qdisc_put_rtab(p->P_tab);
234 		p->P_tab = P_tab;
235 	}
236 
237 	if (tb[TCA_POLICE_RESULT-1])
238 		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
239 	p->toks = p->burst = parm->burst;
240 	p->mtu = parm->mtu;
241 	if (p->mtu == 0) {
242 		p->mtu = ~0;
243 		if (p->R_tab)
244 			p->mtu = 255<<p->R_tab->rate.cell_log;
245 	}
246 	if (p->P_tab)
247 		p->ptoks = L2T_P(p, p->mtu);
248 	p->action = parm->action;
249 
250 #ifdef CONFIG_NET_ESTIMATOR
251 	if (tb[TCA_POLICE_AVRATE-1])
252 		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
253 	if (est)
254 		gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
255 #endif
256 
257 	spin_unlock_bh(&p->lock);
258 	if (ret != ACT_P_CREATED)
259 		return ret;
260 
261 	PSCHED_GET_TIME(p->t_c);
262 	p->index = parm->index ? : tcf_police_new_index();
263 	h = tcf_police_hash(p->index);
264 	write_lock_bh(&police_lock);
265 	p->next = tcf_police_ht[h];
266 	tcf_police_ht[h] = p;
267 	write_unlock_bh(&police_lock);
268 
269 	a->priv = p;
270 	return ret;
271 
272 failure:
273 	if (ret == ACT_P_CREATED)
274 		kfree(p);
275 	return err;
276 }
277 
278 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
279 {
280 	struct tcf_police *p = PRIV(a);
281 
282 	if (p != NULL)
283 		return tcf_police_release(p, bind);
284 	return 0;
285 }
286 
287 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
288                           struct tcf_result *res)
289 {
290 	psched_time_t now;
291 	struct tcf_police *p = PRIV(a);
292 	long toks;
293 	long ptoks = 0;
294 
295 	spin_lock(&p->lock);
296 
297 	p->bstats.bytes += skb->len;
298 	p->bstats.packets++;
299 
300 #ifdef CONFIG_NET_ESTIMATOR
301 	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
302 		p->qstats.overlimits++;
303 		spin_unlock(&p->lock);
304 		return p->action;
305 	}
306 #endif
307 
308 	if (skb->len <= p->mtu) {
309 		if (p->R_tab == NULL) {
310 			spin_unlock(&p->lock);
311 			return p->result;
312 		}
313 
314 		PSCHED_GET_TIME(now);
315 
316 		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
317 
318 		if (p->P_tab) {
319 			ptoks = toks + p->ptoks;
320 			if (ptoks > (long)L2T_P(p, p->mtu))
321 				ptoks = (long)L2T_P(p, p->mtu);
322 			ptoks -= L2T_P(p, skb->len);
323 		}
324 		toks += p->toks;
325 		if (toks > (long)p->burst)
326 			toks = p->burst;
327 		toks -= L2T(p, skb->len);
328 
329 		if ((toks|ptoks) >= 0) {
330 			p->t_c = now;
331 			p->toks = toks;
332 			p->ptoks = ptoks;
333 			spin_unlock(&p->lock);
334 			return p->result;
335 		}
336 	}
337 
338 	p->qstats.overlimits++;
339 	spin_unlock(&p->lock);
340 	return p->action;
341 }
342 
343 static int
344 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
345 {
346 	unsigned char	 *b = skb->tail;
347 	struct tc_police opt;
348 	struct tcf_police *p = PRIV(a);
349 
350 	opt.index = p->index;
351 	opt.action = p->action;
352 	opt.mtu = p->mtu;
353 	opt.burst = p->burst;
354 	opt.refcnt = p->refcnt - ref;
355 	opt.bindcnt = p->bindcnt - bind;
356 	if (p->R_tab)
357 		opt.rate = p->R_tab->rate;
358 	else
359 		memset(&opt.rate, 0, sizeof(opt.rate));
360 	if (p->P_tab)
361 		opt.peakrate = p->P_tab->rate;
362 	else
363 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
364 	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
365 	if (p->result)
366 		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
367 #ifdef CONFIG_NET_ESTIMATOR
368 	if (p->ewma_rate)
369 		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
370 #endif
371 	return skb->len;
372 
373 rtattr_failure:
374 	skb_trim(skb, b - skb->data);
375 	return -1;
376 }
377 
378 MODULE_AUTHOR("Alexey Kuznetsov");
379 MODULE_DESCRIPTION("Policing actions");
380 MODULE_LICENSE("GPL");
381 
382 static struct tc_action_ops act_police_ops = {
383 	.kind		=	"police",
384 	.type		=	TCA_ID_POLICE,
385 	.capab		=	TCA_CAP_NONE,
386 	.owner		=	THIS_MODULE,
387 	.act		=	tcf_act_police,
388 	.dump		=	tcf_act_police_dump,
389 	.cleanup	=	tcf_act_police_cleanup,
390 	.lookup		=	tcf_act_police_hash_search,
391 	.init		=	tcf_act_police_locate,
392 	.walk		=	tcf_act_police_walker
393 };
394 
395 static int __init
396 police_init_module(void)
397 {
398 	return tcf_register_action(&act_police_ops);
399 }
400 
401 static void __exit
402 police_cleanup_module(void)
403 {
404 	tcf_unregister_action(&act_police_ops);
405 }
406 
407 module_init(police_init_module);
408 module_exit(police_cleanup_module);
409 
410 #else /* CONFIG_NET_CLS_ACT */
411 
412 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
413 {
414 	unsigned h;
415 	struct tcf_police *p;
416 	struct rtattr *tb[TCA_POLICE_MAX];
417 	struct tc_police *parm;
418 
419 	if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
420 		return NULL;
421 
422 	if (tb[TCA_POLICE_TBF-1] == NULL ||
423 	    RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
424 		return NULL;
425 
426 	parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
427 
428 	if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
429 		p->refcnt++;
430 		return p;
431 	}
432 
433 	p = kmalloc(sizeof(*p), GFP_KERNEL);
434 	if (p == NULL)
435 		return NULL;
436 
437 	memset(p, 0, sizeof(*p));
438 	p->refcnt = 1;
439 	spin_lock_init(&p->lock);
440 	p->stats_lock = &p->lock;
441 	if (parm->rate.rate) {
442 		p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
443 		if (p->R_tab == NULL)
444 			goto failure;
445 		if (parm->peakrate.rate) {
446 			p->P_tab = qdisc_get_rtab(&parm->peakrate,
447 			                          tb[TCA_POLICE_PEAKRATE-1]);
448 			if (p->P_tab == NULL)
449 				goto failure;
450 		}
451 	}
452 	if (tb[TCA_POLICE_RESULT-1]) {
453 		if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
454 			goto failure;
455 		p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
456 	}
457 #ifdef CONFIG_NET_ESTIMATOR
458 	if (tb[TCA_POLICE_AVRATE-1]) {
459 		if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
460 			goto failure;
461 		p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
462 	}
463 #endif
464 	p->toks = p->burst = parm->burst;
465 	p->mtu = parm->mtu;
466 	if (p->mtu == 0) {
467 		p->mtu = ~0;
468 		if (p->R_tab)
469 			p->mtu = 255<<p->R_tab->rate.cell_log;
470 	}
471 	if (p->P_tab)
472 		p->ptoks = L2T_P(p, p->mtu);
473 	PSCHED_GET_TIME(p->t_c);
474 	p->index = parm->index ? : tcf_police_new_index();
475 	p->action = parm->action;
476 #ifdef CONFIG_NET_ESTIMATOR
477 	if (est)
478 		gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
479 #endif
480 	h = tcf_police_hash(p->index);
481 	write_lock_bh(&police_lock);
482 	p->next = tcf_police_ht[h];
483 	tcf_police_ht[h] = p;
484 	write_unlock_bh(&police_lock);
485 	return p;
486 
487 failure:
488 	if (p->R_tab)
489 		qdisc_put_rtab(p->R_tab);
490 	kfree(p);
491 	return NULL;
492 }
493 
494 int tcf_police(struct sk_buff *skb, struct tcf_police *p)
495 {
496 	psched_time_t now;
497 	long toks;
498 	long ptoks = 0;
499 
500 	spin_lock(&p->lock);
501 
502 	p->bstats.bytes += skb->len;
503 	p->bstats.packets++;
504 
505 #ifdef CONFIG_NET_ESTIMATOR
506 	if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
507 		p->qstats.overlimits++;
508 		spin_unlock(&p->lock);
509 		return p->action;
510 	}
511 #endif
512 
513 	if (skb->len <= p->mtu) {
514 		if (p->R_tab == NULL) {
515 			spin_unlock(&p->lock);
516 			return p->result;
517 		}
518 
519 		PSCHED_GET_TIME(now);
520 
521 		toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
522 
523 		if (p->P_tab) {
524 			ptoks = toks + p->ptoks;
525 			if (ptoks > (long)L2T_P(p, p->mtu))
526 				ptoks = (long)L2T_P(p, p->mtu);
527 			ptoks -= L2T_P(p, skb->len);
528 		}
529 		toks += p->toks;
530 		if (toks > (long)p->burst)
531 			toks = p->burst;
532 		toks -= L2T(p, skb->len);
533 
534 		if ((toks|ptoks) >= 0) {
535 			p->t_c = now;
536 			p->toks = toks;
537 			p->ptoks = ptoks;
538 			spin_unlock(&p->lock);
539 			return p->result;
540 		}
541 	}
542 
543 	p->qstats.overlimits++;
544 	spin_unlock(&p->lock);
545 	return p->action;
546 }
547 EXPORT_SYMBOL(tcf_police);
548 
549 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
550 {
551 	unsigned char	 *b = skb->tail;
552 	struct tc_police opt;
553 
554 	opt.index = p->index;
555 	opt.action = p->action;
556 	opt.mtu = p->mtu;
557 	opt.burst = p->burst;
558 	if (p->R_tab)
559 		opt.rate = p->R_tab->rate;
560 	else
561 		memset(&opt.rate, 0, sizeof(opt.rate));
562 	if (p->P_tab)
563 		opt.peakrate = p->P_tab->rate;
564 	else
565 		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
566 	RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
567 	if (p->result)
568 		RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
569 #ifdef CONFIG_NET_ESTIMATOR
570 	if (p->ewma_rate)
571 		RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
572 #endif
573 	return skb->len;
574 
575 rtattr_failure:
576 	skb_trim(skb, b - skb->data);
577 	return -1;
578 }
579 
580 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
581 {
582 	struct gnet_dump d;
583 
584 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
585 			TCA_XSTATS, p->stats_lock, &d) < 0)
586 		goto errout;
587 
588 	if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
589 #ifdef CONFIG_NET_ESTIMATOR
590 	    gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
591 #endif
592 	    gnet_stats_copy_queue(&d, &p->qstats) < 0)
593 		goto errout;
594 
595 	if (gnet_stats_finish_copy(&d) < 0)
596 		goto errout;
597 
598 	return 0;
599 
600 errout:
601 	return -1;
602 }
603 
604 #endif /* CONFIG_NET_CLS_ACT */
605