1 /* 2 * net/sched/police.c Input police filter. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * J Hadi Salim (action changes) 11 */ 12 13 #include <asm/uaccess.h> 14 #include <asm/system.h> 15 #include <linux/bitops.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/sched.h> 20 #include <linux/string.h> 21 #include <linux/mm.h> 22 #include <linux/socket.h> 23 #include <linux/sockios.h> 24 #include <linux/in.h> 25 #include <linux/errno.h> 26 #include <linux/interrupt.h> 27 #include <linux/netdevice.h> 28 #include <linux/skbuff.h> 29 #include <linux/module.h> 30 #include <linux/rtnetlink.h> 31 #include <linux/init.h> 32 #include <net/sock.h> 33 #include <net/act_api.h> 34 35 #define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) 36 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) 37 #define PRIV(a) ((struct tcf_police *) (a)->priv) 38 39 /* use generic hash table */ 40 #define MY_TAB_SIZE 16 41 #define MY_TAB_MASK 15 42 static u32 idx_gen; 43 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE]; 44 /* Policer hash table lock */ 45 static DEFINE_RWLOCK(police_lock); 46 47 /* Each policer is serialized by its individual spinlock */ 48 49 static __inline__ unsigned tcf_police_hash(u32 index) 50 { 51 return index&0xF; 52 } 53 54 static __inline__ struct tcf_police * tcf_police_lookup(u32 index) 55 { 56 struct tcf_police *p; 57 58 read_lock(&police_lock); 59 for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { 60 if (p->index == index) 61 break; 62 } 63 read_unlock(&police_lock); 64 return p; 65 } 66 67 #ifdef CONFIG_NET_CLS_ACT 68 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 69 int type, struct tc_action *a) 70 { 71 struct tcf_police *p; 72 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 73 struct rtattr *r; 74 75 read_lock(&police_lock); 76 77 s_i = cb->args[0]; 78 79 for (i = 0; i < MY_TAB_SIZE; i++) { 80 p = tcf_police_ht[tcf_police_hash(i)]; 81 82 for (; p; p = p->next) { 83 index++; 84 if (index < s_i) 85 continue; 86 a->priv = p; 87 a->order = index; 88 r = (struct rtattr*) skb->tail; 89 RTA_PUT(skb, a->order, 0, NULL); 90 if (type == RTM_DELACTION) 91 err = tcf_action_dump_1(skb, a, 0, 1); 92 else 93 err = tcf_action_dump_1(skb, a, 0, 0); 94 if (err < 0) { 95 index--; 96 skb_trim(skb, (u8*)r - skb->data); 97 goto done; 98 } 99 r->rta_len = skb->tail - (u8*)r; 100 n_i++; 101 } 102 } 103 done: 104 read_unlock(&police_lock); 105 if (n_i) 106 cb->args[0] += n_i; 107 return n_i; 108 109 rtattr_failure: 110 skb_trim(skb, (u8*)r - skb->data); 111 goto done; 112 } 113 114 static inline int 115 tcf_act_police_hash_search(struct tc_action *a, u32 index) 116 { 117 struct tcf_police *p = tcf_police_lookup(index); 118 119 if (p != NULL) { 120 a->priv = p; 121 return 1; 122 } else { 123 return 0; 124 } 125 } 126 #endif 127 128 static inline u32 tcf_police_new_index(void) 129 { 130 do { 131 if (++idx_gen == 0) 132 idx_gen = 1; 133 } while (tcf_police_lookup(idx_gen)); 134 135 return idx_gen; 136 } 137 138 void tcf_police_destroy(struct tcf_police *p) 139 { 140 unsigned h = tcf_police_hash(p->index); 141 struct tcf_police **p1p; 142 143 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { 144 if (*p1p == p) { 145 write_lock_bh(&police_lock); 146 *p1p = p->next; 147 write_unlock_bh(&police_lock); 148 #ifdef CONFIG_NET_ESTIMATOR 149 gen_kill_estimator(&p->bstats, &p->rate_est); 150 #endif 151 if (p->R_tab) 152 qdisc_put_rtab(p->R_tab); 153 if (p->P_tab) 154 qdisc_put_rtab(p->P_tab); 155 kfree(p); 156 return; 157 } 158 } 159 BUG_TRAP(0); 160 } 161 162 #ifdef CONFIG_NET_CLS_ACT 163 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, 164 struct tc_action *a, int ovr, int bind) 165 { 166 unsigned h; 167 int ret = 0, err; 168 struct rtattr *tb[TCA_POLICE_MAX]; 169 struct tc_police *parm; 170 struct tcf_police *p; 171 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 172 173 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 174 return -EINVAL; 175 176 if (tb[TCA_POLICE_TBF-1] == NULL || 177 RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) 178 return -EINVAL; 179 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 180 181 if (tb[TCA_POLICE_RESULT-1] != NULL && 182 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 183 return -EINVAL; 184 if (tb[TCA_POLICE_RESULT-1] != NULL && 185 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 186 return -EINVAL; 187 188 if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { 189 a->priv = p; 190 if (bind) { 191 p->bindcnt += 1; 192 p->refcnt += 1; 193 } 194 if (ovr) 195 goto override; 196 return ret; 197 } 198 199 p = kmalloc(sizeof(*p), GFP_KERNEL); 200 if (p == NULL) 201 return -ENOMEM; 202 memset(p, 0, sizeof(*p)); 203 204 ret = ACT_P_CREATED; 205 p->refcnt = 1; 206 spin_lock_init(&p->lock); 207 p->stats_lock = &p->lock; 208 if (bind) 209 p->bindcnt = 1; 210 override: 211 if (parm->rate.rate) { 212 err = -ENOMEM; 213 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 214 if (R_tab == NULL) 215 goto failure; 216 if (parm->peakrate.rate) { 217 P_tab = qdisc_get_rtab(&parm->peakrate, 218 tb[TCA_POLICE_PEAKRATE-1]); 219 if (p->P_tab == NULL) { 220 qdisc_put_rtab(R_tab); 221 goto failure; 222 } 223 } 224 } 225 /* No failure allowed after this point */ 226 spin_lock_bh(&p->lock); 227 if (R_tab != NULL) { 228 qdisc_put_rtab(p->R_tab); 229 p->R_tab = R_tab; 230 } 231 if (P_tab != NULL) { 232 qdisc_put_rtab(p->P_tab); 233 p->P_tab = P_tab; 234 } 235 236 if (tb[TCA_POLICE_RESULT-1]) 237 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 238 p->toks = p->burst = parm->burst; 239 p->mtu = parm->mtu; 240 if (p->mtu == 0) { 241 p->mtu = ~0; 242 if (p->R_tab) 243 p->mtu = 255<<p->R_tab->rate.cell_log; 244 } 245 if (p->P_tab) 246 p->ptoks = L2T_P(p, p->mtu); 247 p->action = parm->action; 248 249 #ifdef CONFIG_NET_ESTIMATOR 250 if (tb[TCA_POLICE_AVRATE-1]) 251 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 252 if (est) 253 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); 254 #endif 255 256 spin_unlock_bh(&p->lock); 257 if (ret != ACT_P_CREATED) 258 return ret; 259 260 PSCHED_GET_TIME(p->t_c); 261 p->index = parm->index ? : tcf_police_new_index(); 262 h = tcf_police_hash(p->index); 263 write_lock_bh(&police_lock); 264 p->next = tcf_police_ht[h]; 265 tcf_police_ht[h] = p; 266 write_unlock_bh(&police_lock); 267 268 a->priv = p; 269 return ret; 270 271 failure: 272 if (ret == ACT_P_CREATED) 273 kfree(p); 274 return err; 275 } 276 277 static int tcf_act_police_cleanup(struct tc_action *a, int bind) 278 { 279 struct tcf_police *p = PRIV(a); 280 281 if (p != NULL) 282 return tcf_police_release(p, bind); 283 return 0; 284 } 285 286 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, 287 struct tcf_result *res) 288 { 289 psched_time_t now; 290 struct tcf_police *p = PRIV(a); 291 long toks; 292 long ptoks = 0; 293 294 spin_lock(&p->lock); 295 296 p->bstats.bytes += skb->len; 297 p->bstats.packets++; 298 299 #ifdef CONFIG_NET_ESTIMATOR 300 if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { 301 p->qstats.overlimits++; 302 spin_unlock(&p->lock); 303 return p->action; 304 } 305 #endif 306 307 if (skb->len <= p->mtu) { 308 if (p->R_tab == NULL) { 309 spin_unlock(&p->lock); 310 return p->result; 311 } 312 313 PSCHED_GET_TIME(now); 314 315 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); 316 317 if (p->P_tab) { 318 ptoks = toks + p->ptoks; 319 if (ptoks > (long)L2T_P(p, p->mtu)) 320 ptoks = (long)L2T_P(p, p->mtu); 321 ptoks -= L2T_P(p, skb->len); 322 } 323 toks += p->toks; 324 if (toks > (long)p->burst) 325 toks = p->burst; 326 toks -= L2T(p, skb->len); 327 328 if ((toks|ptoks) >= 0) { 329 p->t_c = now; 330 p->toks = toks; 331 p->ptoks = ptoks; 332 spin_unlock(&p->lock); 333 return p->result; 334 } 335 } 336 337 p->qstats.overlimits++; 338 spin_unlock(&p->lock); 339 return p->action; 340 } 341 342 static int 343 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 344 { 345 unsigned char *b = skb->tail; 346 struct tc_police opt; 347 struct tcf_police *p = PRIV(a); 348 349 opt.index = p->index; 350 opt.action = p->action; 351 opt.mtu = p->mtu; 352 opt.burst = p->burst; 353 opt.refcnt = p->refcnt - ref; 354 opt.bindcnt = p->bindcnt - bind; 355 if (p->R_tab) 356 opt.rate = p->R_tab->rate; 357 else 358 memset(&opt.rate, 0, sizeof(opt.rate)); 359 if (p->P_tab) 360 opt.peakrate = p->P_tab->rate; 361 else 362 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 363 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 364 if (p->result) 365 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); 366 #ifdef CONFIG_NET_ESTIMATOR 367 if (p->ewma_rate) 368 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); 369 #endif 370 return skb->len; 371 372 rtattr_failure: 373 skb_trim(skb, b - skb->data); 374 return -1; 375 } 376 377 MODULE_AUTHOR("Alexey Kuznetsov"); 378 MODULE_DESCRIPTION("Policing actions"); 379 MODULE_LICENSE("GPL"); 380 381 static struct tc_action_ops act_police_ops = { 382 .kind = "police", 383 .type = TCA_ID_POLICE, 384 .capab = TCA_CAP_NONE, 385 .owner = THIS_MODULE, 386 .act = tcf_act_police, 387 .dump = tcf_act_police_dump, 388 .cleanup = tcf_act_police_cleanup, 389 .lookup = tcf_act_police_hash_search, 390 .init = tcf_act_police_locate, 391 .walk = tcf_act_police_walker 392 }; 393 394 static int __init 395 police_init_module(void) 396 { 397 return tcf_register_action(&act_police_ops); 398 } 399 400 static void __exit 401 police_cleanup_module(void) 402 { 403 tcf_unregister_action(&act_police_ops); 404 } 405 406 module_init(police_init_module); 407 module_exit(police_cleanup_module); 408 409 #else /* CONFIG_NET_CLS_ACT */ 410 411 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) 412 { 413 unsigned h; 414 struct tcf_police *p; 415 struct rtattr *tb[TCA_POLICE_MAX]; 416 struct tc_police *parm; 417 418 if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 419 return NULL; 420 421 if (tb[TCA_POLICE_TBF-1] == NULL || 422 RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) 423 return NULL; 424 425 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 426 427 if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { 428 p->refcnt++; 429 return p; 430 } 431 432 p = kmalloc(sizeof(*p), GFP_KERNEL); 433 if (p == NULL) 434 return NULL; 435 436 memset(p, 0, sizeof(*p)); 437 p->refcnt = 1; 438 spin_lock_init(&p->lock); 439 p->stats_lock = &p->lock; 440 if (parm->rate.rate) { 441 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 442 if (p->R_tab == NULL) 443 goto failure; 444 if (parm->peakrate.rate) { 445 p->P_tab = qdisc_get_rtab(&parm->peakrate, 446 tb[TCA_POLICE_PEAKRATE-1]); 447 if (p->P_tab == NULL) 448 goto failure; 449 } 450 } 451 if (tb[TCA_POLICE_RESULT-1]) { 452 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 453 goto failure; 454 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 455 } 456 #ifdef CONFIG_NET_ESTIMATOR 457 if (tb[TCA_POLICE_AVRATE-1]) { 458 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) 459 goto failure; 460 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 461 } 462 #endif 463 p->toks = p->burst = parm->burst; 464 p->mtu = parm->mtu; 465 if (p->mtu == 0) { 466 p->mtu = ~0; 467 if (p->R_tab) 468 p->mtu = 255<<p->R_tab->rate.cell_log; 469 } 470 if (p->P_tab) 471 p->ptoks = L2T_P(p, p->mtu); 472 PSCHED_GET_TIME(p->t_c); 473 p->index = parm->index ? : tcf_police_new_index(); 474 p->action = parm->action; 475 #ifdef CONFIG_NET_ESTIMATOR 476 if (est) 477 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); 478 #endif 479 h = tcf_police_hash(p->index); 480 write_lock_bh(&police_lock); 481 p->next = tcf_police_ht[h]; 482 tcf_police_ht[h] = p; 483 write_unlock_bh(&police_lock); 484 return p; 485 486 failure: 487 if (p->R_tab) 488 qdisc_put_rtab(p->R_tab); 489 kfree(p); 490 return NULL; 491 } 492 493 int tcf_police(struct sk_buff *skb, struct tcf_police *p) 494 { 495 psched_time_t now; 496 long toks; 497 long ptoks = 0; 498 499 spin_lock(&p->lock); 500 501 p->bstats.bytes += skb->len; 502 p->bstats.packets++; 503 504 #ifdef CONFIG_NET_ESTIMATOR 505 if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { 506 p->qstats.overlimits++; 507 spin_unlock(&p->lock); 508 return p->action; 509 } 510 #endif 511 512 if (skb->len <= p->mtu) { 513 if (p->R_tab == NULL) { 514 spin_unlock(&p->lock); 515 return p->result; 516 } 517 518 PSCHED_GET_TIME(now); 519 520 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); 521 522 if (p->P_tab) { 523 ptoks = toks + p->ptoks; 524 if (ptoks > (long)L2T_P(p, p->mtu)) 525 ptoks = (long)L2T_P(p, p->mtu); 526 ptoks -= L2T_P(p, skb->len); 527 } 528 toks += p->toks; 529 if (toks > (long)p->burst) 530 toks = p->burst; 531 toks -= L2T(p, skb->len); 532 533 if ((toks|ptoks) >= 0) { 534 p->t_c = now; 535 p->toks = toks; 536 p->ptoks = ptoks; 537 spin_unlock(&p->lock); 538 return p->result; 539 } 540 } 541 542 p->qstats.overlimits++; 543 spin_unlock(&p->lock); 544 return p->action; 545 } 546 EXPORT_SYMBOL(tcf_police); 547 548 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) 549 { 550 unsigned char *b = skb->tail; 551 struct tc_police opt; 552 553 opt.index = p->index; 554 opt.action = p->action; 555 opt.mtu = p->mtu; 556 opt.burst = p->burst; 557 if (p->R_tab) 558 opt.rate = p->R_tab->rate; 559 else 560 memset(&opt.rate, 0, sizeof(opt.rate)); 561 if (p->P_tab) 562 opt.peakrate = p->P_tab->rate; 563 else 564 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 565 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 566 if (p->result) 567 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); 568 #ifdef CONFIG_NET_ESTIMATOR 569 if (p->ewma_rate) 570 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); 571 #endif 572 return skb->len; 573 574 rtattr_failure: 575 skb_trim(skb, b - skb->data); 576 return -1; 577 } 578 579 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) 580 { 581 struct gnet_dump d; 582 583 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 584 TCA_XSTATS, p->stats_lock, &d) < 0) 585 goto errout; 586 587 if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || 588 #ifdef CONFIG_NET_ESTIMATOR 589 gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || 590 #endif 591 gnet_stats_copy_queue(&d, &p->qstats) < 0) 592 goto errout; 593 594 if (gnet_stats_finish_copy(&d) < 0) 595 goto errout; 596 597 return 0; 598 599 errout: 600 return -1; 601 } 602 603 #endif /* CONFIG_NET_CLS_ACT */ 604