1 /* 2 * net/sched/police.c Input police filter. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * J Hadi Salim (action changes) 11 */ 12 13 #include <asm/uaccess.h> 14 #include <asm/system.h> 15 #include <linux/bitops.h> 16 #include <linux/config.h> 17 #include <linux/module.h> 18 #include <linux/types.h> 19 #include <linux/kernel.h> 20 #include <linux/sched.h> 21 #include <linux/string.h> 22 #include <linux/mm.h> 23 #include <linux/socket.h> 24 #include <linux/sockios.h> 25 #include <linux/in.h> 26 #include <linux/errno.h> 27 #include <linux/interrupt.h> 28 #include <linux/netdevice.h> 29 #include <linux/skbuff.h> 30 #include <linux/module.h> 31 #include <linux/rtnetlink.h> 32 #include <linux/init.h> 33 #include <net/sock.h> 34 #include <net/act_api.h> 35 36 #define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) 37 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) 38 #define PRIV(a) ((struct tcf_police *) (a)->priv) 39 40 /* use generic hash table */ 41 #define MY_TAB_SIZE 16 42 #define MY_TAB_MASK 15 43 static u32 idx_gen; 44 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE]; 45 /* Policer hash table lock */ 46 static DEFINE_RWLOCK(police_lock); 47 48 /* Each policer is serialized by its individual spinlock */ 49 50 static __inline__ unsigned tcf_police_hash(u32 index) 51 { 52 return index&0xF; 53 } 54 55 static __inline__ struct tcf_police * tcf_police_lookup(u32 index) 56 { 57 struct tcf_police *p; 58 59 read_lock(&police_lock); 60 for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { 61 if (p->index == index) 62 break; 63 } 64 read_unlock(&police_lock); 65 return p; 66 } 67 68 #ifdef CONFIG_NET_CLS_ACT 69 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 70 int type, struct tc_action *a) 71 { 72 struct tcf_police *p; 73 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 74 struct rtattr *r; 75 76 read_lock(&police_lock); 77 78 s_i = cb->args[0]; 79 80 for (i = 0; i < MY_TAB_SIZE; i++) { 81 p = tcf_police_ht[tcf_police_hash(i)]; 82 83 for (; p; p = p->next) { 84 index++; 85 if (index < s_i) 86 continue; 87 a->priv = p; 88 a->order = index; 89 r = (struct rtattr*) skb->tail; 90 RTA_PUT(skb, a->order, 0, NULL); 91 if (type == RTM_DELACTION) 92 err = tcf_action_dump_1(skb, a, 0, 1); 93 else 94 err = tcf_action_dump_1(skb, a, 0, 0); 95 if (err < 0) { 96 index--; 97 skb_trim(skb, (u8*)r - skb->data); 98 goto done; 99 } 100 r->rta_len = skb->tail - (u8*)r; 101 n_i++; 102 } 103 } 104 done: 105 read_unlock(&police_lock); 106 if (n_i) 107 cb->args[0] += n_i; 108 return n_i; 109 110 rtattr_failure: 111 skb_trim(skb, (u8*)r - skb->data); 112 goto done; 113 } 114 115 static inline int 116 tcf_act_police_hash_search(struct tc_action *a, u32 index) 117 { 118 struct tcf_police *p = tcf_police_lookup(index); 119 120 if (p != NULL) { 121 a->priv = p; 122 return 1; 123 } else { 124 return 0; 125 } 126 } 127 #endif 128 129 static inline u32 tcf_police_new_index(void) 130 { 131 do { 132 if (++idx_gen == 0) 133 idx_gen = 1; 134 } while (tcf_police_lookup(idx_gen)); 135 136 return idx_gen; 137 } 138 139 void tcf_police_destroy(struct tcf_police *p) 140 { 141 unsigned h = tcf_police_hash(p->index); 142 struct tcf_police **p1p; 143 144 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { 145 if (*p1p == p) { 146 write_lock_bh(&police_lock); 147 *p1p = p->next; 148 write_unlock_bh(&police_lock); 149 #ifdef CONFIG_NET_ESTIMATOR 150 gen_kill_estimator(&p->bstats, &p->rate_est); 151 #endif 152 if (p->R_tab) 153 qdisc_put_rtab(p->R_tab); 154 if (p->P_tab) 155 qdisc_put_rtab(p->P_tab); 156 kfree(p); 157 return; 158 } 159 } 160 BUG_TRAP(0); 161 } 162 163 #ifdef CONFIG_NET_CLS_ACT 164 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, 165 struct tc_action *a, int ovr, int bind) 166 { 167 unsigned h; 168 int ret = 0, err; 169 struct rtattr *tb[TCA_POLICE_MAX]; 170 struct tc_police *parm; 171 struct tcf_police *p; 172 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 173 174 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 175 return -EINVAL; 176 177 if (tb[TCA_POLICE_TBF-1] == NULL || 178 RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) 179 return -EINVAL; 180 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 181 182 if (tb[TCA_POLICE_RESULT-1] != NULL && 183 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 184 return -EINVAL; 185 if (tb[TCA_POLICE_RESULT-1] != NULL && 186 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 187 return -EINVAL; 188 189 if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { 190 a->priv = p; 191 if (bind) { 192 p->bindcnt += 1; 193 p->refcnt += 1; 194 } 195 if (ovr) 196 goto override; 197 return ret; 198 } 199 200 p = kmalloc(sizeof(*p), GFP_KERNEL); 201 if (p == NULL) 202 return -ENOMEM; 203 memset(p, 0, sizeof(*p)); 204 205 ret = ACT_P_CREATED; 206 p->refcnt = 1; 207 spin_lock_init(&p->lock); 208 p->stats_lock = &p->lock; 209 if (bind) 210 p->bindcnt = 1; 211 override: 212 if (parm->rate.rate) { 213 err = -ENOMEM; 214 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 215 if (R_tab == NULL) 216 goto failure; 217 if (parm->peakrate.rate) { 218 P_tab = qdisc_get_rtab(&parm->peakrate, 219 tb[TCA_POLICE_PEAKRATE-1]); 220 if (p->P_tab == NULL) { 221 qdisc_put_rtab(R_tab); 222 goto failure; 223 } 224 } 225 } 226 /* No failure allowed after this point */ 227 spin_lock_bh(&p->lock); 228 if (R_tab != NULL) { 229 qdisc_put_rtab(p->R_tab); 230 p->R_tab = R_tab; 231 } 232 if (P_tab != NULL) { 233 qdisc_put_rtab(p->P_tab); 234 p->P_tab = P_tab; 235 } 236 237 if (tb[TCA_POLICE_RESULT-1]) 238 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 239 p->toks = p->burst = parm->burst; 240 p->mtu = parm->mtu; 241 if (p->mtu == 0) { 242 p->mtu = ~0; 243 if (p->R_tab) 244 p->mtu = 255<<p->R_tab->rate.cell_log; 245 } 246 if (p->P_tab) 247 p->ptoks = L2T_P(p, p->mtu); 248 p->action = parm->action; 249 250 #ifdef CONFIG_NET_ESTIMATOR 251 if (tb[TCA_POLICE_AVRATE-1]) 252 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 253 if (est) 254 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); 255 #endif 256 257 spin_unlock_bh(&p->lock); 258 if (ret != ACT_P_CREATED) 259 return ret; 260 261 PSCHED_GET_TIME(p->t_c); 262 p->index = parm->index ? : tcf_police_new_index(); 263 h = tcf_police_hash(p->index); 264 write_lock_bh(&police_lock); 265 p->next = tcf_police_ht[h]; 266 tcf_police_ht[h] = p; 267 write_unlock_bh(&police_lock); 268 269 a->priv = p; 270 return ret; 271 272 failure: 273 if (ret == ACT_P_CREATED) 274 kfree(p); 275 return err; 276 } 277 278 static int tcf_act_police_cleanup(struct tc_action *a, int bind) 279 { 280 struct tcf_police *p = PRIV(a); 281 282 if (p != NULL) 283 return tcf_police_release(p, bind); 284 return 0; 285 } 286 287 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, 288 struct tcf_result *res) 289 { 290 psched_time_t now; 291 struct tcf_police *p = PRIV(a); 292 long toks; 293 long ptoks = 0; 294 295 spin_lock(&p->lock); 296 297 p->bstats.bytes += skb->len; 298 p->bstats.packets++; 299 300 #ifdef CONFIG_NET_ESTIMATOR 301 if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { 302 p->qstats.overlimits++; 303 spin_unlock(&p->lock); 304 return p->action; 305 } 306 #endif 307 308 if (skb->len <= p->mtu) { 309 if (p->R_tab == NULL) { 310 spin_unlock(&p->lock); 311 return p->result; 312 } 313 314 PSCHED_GET_TIME(now); 315 316 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); 317 318 if (p->P_tab) { 319 ptoks = toks + p->ptoks; 320 if (ptoks > (long)L2T_P(p, p->mtu)) 321 ptoks = (long)L2T_P(p, p->mtu); 322 ptoks -= L2T_P(p, skb->len); 323 } 324 toks += p->toks; 325 if (toks > (long)p->burst) 326 toks = p->burst; 327 toks -= L2T(p, skb->len); 328 329 if ((toks|ptoks) >= 0) { 330 p->t_c = now; 331 p->toks = toks; 332 p->ptoks = ptoks; 333 spin_unlock(&p->lock); 334 return p->result; 335 } 336 } 337 338 p->qstats.overlimits++; 339 spin_unlock(&p->lock); 340 return p->action; 341 } 342 343 static int 344 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 345 { 346 unsigned char *b = skb->tail; 347 struct tc_police opt; 348 struct tcf_police *p = PRIV(a); 349 350 opt.index = p->index; 351 opt.action = p->action; 352 opt.mtu = p->mtu; 353 opt.burst = p->burst; 354 opt.refcnt = p->refcnt - ref; 355 opt.bindcnt = p->bindcnt - bind; 356 if (p->R_tab) 357 opt.rate = p->R_tab->rate; 358 else 359 memset(&opt.rate, 0, sizeof(opt.rate)); 360 if (p->P_tab) 361 opt.peakrate = p->P_tab->rate; 362 else 363 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 364 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 365 if (p->result) 366 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); 367 #ifdef CONFIG_NET_ESTIMATOR 368 if (p->ewma_rate) 369 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); 370 #endif 371 return skb->len; 372 373 rtattr_failure: 374 skb_trim(skb, b - skb->data); 375 return -1; 376 } 377 378 MODULE_AUTHOR("Alexey Kuznetsov"); 379 MODULE_DESCRIPTION("Policing actions"); 380 MODULE_LICENSE("GPL"); 381 382 static struct tc_action_ops act_police_ops = { 383 .kind = "police", 384 .type = TCA_ID_POLICE, 385 .capab = TCA_CAP_NONE, 386 .owner = THIS_MODULE, 387 .act = tcf_act_police, 388 .dump = tcf_act_police_dump, 389 .cleanup = tcf_act_police_cleanup, 390 .lookup = tcf_act_police_hash_search, 391 .init = tcf_act_police_locate, 392 .walk = tcf_act_police_walker 393 }; 394 395 static int __init 396 police_init_module(void) 397 { 398 return tcf_register_action(&act_police_ops); 399 } 400 401 static void __exit 402 police_cleanup_module(void) 403 { 404 tcf_unregister_action(&act_police_ops); 405 } 406 407 module_init(police_init_module); 408 module_exit(police_cleanup_module); 409 410 #else /* CONFIG_NET_CLS_ACT */ 411 412 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) 413 { 414 unsigned h; 415 struct tcf_police *p; 416 struct rtattr *tb[TCA_POLICE_MAX]; 417 struct tc_police *parm; 418 419 if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 420 return NULL; 421 422 if (tb[TCA_POLICE_TBF-1] == NULL || 423 RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) 424 return NULL; 425 426 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 427 428 if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { 429 p->refcnt++; 430 return p; 431 } 432 433 p = kmalloc(sizeof(*p), GFP_KERNEL); 434 if (p == NULL) 435 return NULL; 436 437 memset(p, 0, sizeof(*p)); 438 p->refcnt = 1; 439 spin_lock_init(&p->lock); 440 p->stats_lock = &p->lock; 441 if (parm->rate.rate) { 442 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 443 if (p->R_tab == NULL) 444 goto failure; 445 if (parm->peakrate.rate) { 446 p->P_tab = qdisc_get_rtab(&parm->peakrate, 447 tb[TCA_POLICE_PEAKRATE-1]); 448 if (p->P_tab == NULL) 449 goto failure; 450 } 451 } 452 if (tb[TCA_POLICE_RESULT-1]) { 453 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 454 goto failure; 455 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 456 } 457 #ifdef CONFIG_NET_ESTIMATOR 458 if (tb[TCA_POLICE_AVRATE-1]) { 459 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) 460 goto failure; 461 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 462 } 463 #endif 464 p->toks = p->burst = parm->burst; 465 p->mtu = parm->mtu; 466 if (p->mtu == 0) { 467 p->mtu = ~0; 468 if (p->R_tab) 469 p->mtu = 255<<p->R_tab->rate.cell_log; 470 } 471 if (p->P_tab) 472 p->ptoks = L2T_P(p, p->mtu); 473 PSCHED_GET_TIME(p->t_c); 474 p->index = parm->index ? : tcf_police_new_index(); 475 p->action = parm->action; 476 #ifdef CONFIG_NET_ESTIMATOR 477 if (est) 478 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); 479 #endif 480 h = tcf_police_hash(p->index); 481 write_lock_bh(&police_lock); 482 p->next = tcf_police_ht[h]; 483 tcf_police_ht[h] = p; 484 write_unlock_bh(&police_lock); 485 return p; 486 487 failure: 488 if (p->R_tab) 489 qdisc_put_rtab(p->R_tab); 490 kfree(p); 491 return NULL; 492 } 493 494 int tcf_police(struct sk_buff *skb, struct tcf_police *p) 495 { 496 psched_time_t now; 497 long toks; 498 long ptoks = 0; 499 500 spin_lock(&p->lock); 501 502 p->bstats.bytes += skb->len; 503 p->bstats.packets++; 504 505 #ifdef CONFIG_NET_ESTIMATOR 506 if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { 507 p->qstats.overlimits++; 508 spin_unlock(&p->lock); 509 return p->action; 510 } 511 #endif 512 513 if (skb->len <= p->mtu) { 514 if (p->R_tab == NULL) { 515 spin_unlock(&p->lock); 516 return p->result; 517 } 518 519 PSCHED_GET_TIME(now); 520 521 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); 522 523 if (p->P_tab) { 524 ptoks = toks + p->ptoks; 525 if (ptoks > (long)L2T_P(p, p->mtu)) 526 ptoks = (long)L2T_P(p, p->mtu); 527 ptoks -= L2T_P(p, skb->len); 528 } 529 toks += p->toks; 530 if (toks > (long)p->burst) 531 toks = p->burst; 532 toks -= L2T(p, skb->len); 533 534 if ((toks|ptoks) >= 0) { 535 p->t_c = now; 536 p->toks = toks; 537 p->ptoks = ptoks; 538 spin_unlock(&p->lock); 539 return p->result; 540 } 541 } 542 543 p->qstats.overlimits++; 544 spin_unlock(&p->lock); 545 return p->action; 546 } 547 EXPORT_SYMBOL(tcf_police); 548 549 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) 550 { 551 unsigned char *b = skb->tail; 552 struct tc_police opt; 553 554 opt.index = p->index; 555 opt.action = p->action; 556 opt.mtu = p->mtu; 557 opt.burst = p->burst; 558 if (p->R_tab) 559 opt.rate = p->R_tab->rate; 560 else 561 memset(&opt.rate, 0, sizeof(opt.rate)); 562 if (p->P_tab) 563 opt.peakrate = p->P_tab->rate; 564 else 565 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 566 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 567 if (p->result) 568 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); 569 #ifdef CONFIG_NET_ESTIMATOR 570 if (p->ewma_rate) 571 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); 572 #endif 573 return skb->len; 574 575 rtattr_failure: 576 skb_trim(skb, b - skb->data); 577 return -1; 578 } 579 580 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) 581 { 582 struct gnet_dump d; 583 584 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 585 TCA_XSTATS, p->stats_lock, &d) < 0) 586 goto errout; 587 588 if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || 589 #ifdef CONFIG_NET_ESTIMATOR 590 gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || 591 #endif 592 gnet_stats_copy_queue(&d, &p->qstats) < 0) 593 goto errout; 594 595 if (gnet_stats_finish_copy(&d) < 0) 596 goto errout; 597 598 return 0; 599 600 errout: 601 return -1; 602 } 603 604 #endif /* CONFIG_NET_CLS_ACT */ 605