1 /* 2 * net/sched/police.c Input police filter. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * J Hadi Salim (action changes) 11 */ 12 13 #include <asm/uaccess.h> 14 #include <asm/system.h> 15 #include <linux/bitops.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/string.h> 20 #include <linux/mm.h> 21 #include <linux/socket.h> 22 #include <linux/sockios.h> 23 #include <linux/in.h> 24 #include <linux/errno.h> 25 #include <linux/interrupt.h> 26 #include <linux/netdevice.h> 27 #include <linux/skbuff.h> 28 #include <linux/module.h> 29 #include <linux/rtnetlink.h> 30 #include <linux/init.h> 31 #include <net/sock.h> 32 #include <net/act_api.h> 33 34 #define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) 35 #define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) 36 37 #define POL_TAB_MASK 15 38 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; 39 static u32 police_idx_gen; 40 static DEFINE_RWLOCK(police_lock); 41 42 static struct tcf_hashinfo police_hash_info = { 43 .htab = tcf_police_ht, 44 .hmask = POL_TAB_MASK, 45 .lock = &police_lock, 46 }; 47 48 /* old policer structure from before tc actions */ 49 struct tc_police_compat 50 { 51 u32 index; 52 int action; 53 u32 limit; 54 u32 burst; 55 u32 mtu; 56 struct tc_ratespec rate; 57 struct tc_ratespec peakrate; 58 }; 59 60 /* Each policer is serialized by its individual spinlock */ 61 62 #ifdef CONFIG_NET_CLS_ACT 63 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 64 int type, struct tc_action *a) 65 { 66 struct tcf_common *p; 67 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 68 struct rtattr *r; 69 70 read_lock(&police_lock); 71 72 s_i = cb->args[0]; 73 74 for (i = 0; i < (POL_TAB_MASK + 1); i++) { 75 p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; 76 77 for (; p; p = p->tcfc_next) { 78 index++; 79 if (index < s_i) 80 continue; 81 a->priv = p; 82 a->order = index; 83 r = (struct rtattr*) skb->tail; 84 RTA_PUT(skb, a->order, 0, NULL); 85 if (type == RTM_DELACTION) 86 err = tcf_action_dump_1(skb, a, 0, 1); 87 else 88 err = tcf_action_dump_1(skb, a, 0, 0); 89 if (err < 0) { 90 index--; 91 skb_trim(skb, (u8*)r - skb->data); 92 goto done; 93 } 94 r->rta_len = skb->tail - (u8*)r; 95 n_i++; 96 } 97 } 98 done: 99 read_unlock(&police_lock); 100 if (n_i) 101 cb->args[0] += n_i; 102 return n_i; 103 104 rtattr_failure: 105 skb_trim(skb, (u8*)r - skb->data); 106 goto done; 107 } 108 #endif 109 110 void tcf_police_destroy(struct tcf_police *p) 111 { 112 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 113 struct tcf_common **p1p; 114 115 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { 116 if (*p1p == &p->common) { 117 write_lock_bh(&police_lock); 118 *p1p = p->tcf_next; 119 write_unlock_bh(&police_lock); 120 #ifdef CONFIG_NET_ESTIMATOR 121 gen_kill_estimator(&p->tcf_bstats, 122 &p->tcf_rate_est); 123 #endif 124 if (p->tcfp_R_tab) 125 qdisc_put_rtab(p->tcfp_R_tab); 126 if (p->tcfp_P_tab) 127 qdisc_put_rtab(p->tcfp_P_tab); 128 kfree(p); 129 return; 130 } 131 } 132 BUG_TRAP(0); 133 } 134 135 #ifdef CONFIG_NET_CLS_ACT 136 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, 137 struct tc_action *a, int ovr, int bind) 138 { 139 unsigned h; 140 int ret = 0, err; 141 struct rtattr *tb[TCA_POLICE_MAX]; 142 struct tc_police *parm; 143 struct tcf_police *police; 144 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 145 int size; 146 147 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 148 return -EINVAL; 149 150 if (tb[TCA_POLICE_TBF-1] == NULL) 151 return -EINVAL; 152 size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); 153 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) 154 return -EINVAL; 155 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 156 157 if (tb[TCA_POLICE_RESULT-1] != NULL && 158 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 159 return -EINVAL; 160 if (tb[TCA_POLICE_RESULT-1] != NULL && 161 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 162 return -EINVAL; 163 164 if (parm->index) { 165 struct tcf_common *pc; 166 167 pc = tcf_hash_lookup(parm->index, &police_hash_info); 168 if (pc != NULL) { 169 a->priv = pc; 170 police = to_police(pc); 171 if (bind) { 172 police->tcf_bindcnt += 1; 173 police->tcf_refcnt += 1; 174 } 175 if (ovr) 176 goto override; 177 return ret; 178 } 179 } 180 181 police = kzalloc(sizeof(*police), GFP_KERNEL); 182 if (police == NULL) 183 return -ENOMEM; 184 ret = ACT_P_CREATED; 185 police->tcf_refcnt = 1; 186 spin_lock_init(&police->tcf_lock); 187 police->tcf_stats_lock = &police->tcf_lock; 188 if (bind) 189 police->tcf_bindcnt = 1; 190 override: 191 if (parm->rate.rate) { 192 err = -ENOMEM; 193 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 194 if (R_tab == NULL) 195 goto failure; 196 if (parm->peakrate.rate) { 197 P_tab = qdisc_get_rtab(&parm->peakrate, 198 tb[TCA_POLICE_PEAKRATE-1]); 199 if (P_tab == NULL) { 200 qdisc_put_rtab(R_tab); 201 goto failure; 202 } 203 } 204 } 205 /* No failure allowed after this point */ 206 spin_lock_bh(&police->tcf_lock); 207 if (R_tab != NULL) { 208 qdisc_put_rtab(police->tcfp_R_tab); 209 police->tcfp_R_tab = R_tab; 210 } 211 if (P_tab != NULL) { 212 qdisc_put_rtab(police->tcfp_P_tab); 213 police->tcfp_P_tab = P_tab; 214 } 215 216 if (tb[TCA_POLICE_RESULT-1]) 217 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 218 police->tcfp_toks = police->tcfp_burst = parm->burst; 219 police->tcfp_mtu = parm->mtu; 220 if (police->tcfp_mtu == 0) { 221 police->tcfp_mtu = ~0; 222 if (police->tcfp_R_tab) 223 police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; 224 } 225 if (police->tcfp_P_tab) 226 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); 227 police->tcf_action = parm->action; 228 229 #ifdef CONFIG_NET_ESTIMATOR 230 if (tb[TCA_POLICE_AVRATE-1]) 231 police->tcfp_ewma_rate = 232 *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 233 if (est) 234 gen_replace_estimator(&police->tcf_bstats, 235 &police->tcf_rate_est, 236 police->tcf_stats_lock, est); 237 #endif 238 239 spin_unlock_bh(&police->tcf_lock); 240 if (ret != ACT_P_CREATED) 241 return ret; 242 243 PSCHED_GET_TIME(police->tcfp_t_c); 244 police->tcf_index = parm->index ? parm->index : 245 tcf_hash_new_index(&police_idx_gen, &police_hash_info); 246 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 247 write_lock_bh(&police_lock); 248 police->tcf_next = tcf_police_ht[h]; 249 tcf_police_ht[h] = &police->common; 250 write_unlock_bh(&police_lock); 251 252 a->priv = police; 253 return ret; 254 255 failure: 256 if (ret == ACT_P_CREATED) 257 kfree(police); 258 return err; 259 } 260 261 static int tcf_act_police_cleanup(struct tc_action *a, int bind) 262 { 263 struct tcf_police *p = a->priv; 264 265 if (p != NULL) 266 return tcf_police_release(p, bind); 267 return 0; 268 } 269 270 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, 271 struct tcf_result *res) 272 { 273 struct tcf_police *police = a->priv; 274 psched_time_t now; 275 long toks; 276 long ptoks = 0; 277 278 spin_lock(&police->tcf_lock); 279 280 police->tcf_bstats.bytes += skb->len; 281 police->tcf_bstats.packets++; 282 283 #ifdef CONFIG_NET_ESTIMATOR 284 if (police->tcfp_ewma_rate && 285 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { 286 police->tcf_qstats.overlimits++; 287 spin_unlock(&police->tcf_lock); 288 return police->tcf_action; 289 } 290 #endif 291 292 if (skb->len <= police->tcfp_mtu) { 293 if (police->tcfp_R_tab == NULL) { 294 spin_unlock(&police->tcf_lock); 295 return police->tcfp_result; 296 } 297 298 PSCHED_GET_TIME(now); 299 300 toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, 301 police->tcfp_burst); 302 if (police->tcfp_P_tab) { 303 ptoks = toks + police->tcfp_ptoks; 304 if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) 305 ptoks = (long)L2T_P(police, police->tcfp_mtu); 306 ptoks -= L2T_P(police, skb->len); 307 } 308 toks += police->tcfp_toks; 309 if (toks > (long)police->tcfp_burst) 310 toks = police->tcfp_burst; 311 toks -= L2T(police, skb->len); 312 if ((toks|ptoks) >= 0) { 313 police->tcfp_t_c = now; 314 police->tcfp_toks = toks; 315 police->tcfp_ptoks = ptoks; 316 spin_unlock(&police->tcf_lock); 317 return police->tcfp_result; 318 } 319 } 320 321 police->tcf_qstats.overlimits++; 322 spin_unlock(&police->tcf_lock); 323 return police->tcf_action; 324 } 325 326 static int 327 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 328 { 329 unsigned char *b = skb->tail; 330 struct tcf_police *police = a->priv; 331 struct tc_police opt; 332 333 opt.index = police->tcf_index; 334 opt.action = police->tcf_action; 335 opt.mtu = police->tcfp_mtu; 336 opt.burst = police->tcfp_burst; 337 opt.refcnt = police->tcf_refcnt - ref; 338 opt.bindcnt = police->tcf_bindcnt - bind; 339 if (police->tcfp_R_tab) 340 opt.rate = police->tcfp_R_tab->rate; 341 else 342 memset(&opt.rate, 0, sizeof(opt.rate)); 343 if (police->tcfp_P_tab) 344 opt.peakrate = police->tcfp_P_tab->rate; 345 else 346 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 347 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 348 if (police->tcfp_result) 349 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), 350 &police->tcfp_result); 351 #ifdef CONFIG_NET_ESTIMATOR 352 if (police->tcfp_ewma_rate) 353 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); 354 #endif 355 return skb->len; 356 357 rtattr_failure: 358 skb_trim(skb, b - skb->data); 359 return -1; 360 } 361 362 MODULE_AUTHOR("Alexey Kuznetsov"); 363 MODULE_DESCRIPTION("Policing actions"); 364 MODULE_LICENSE("GPL"); 365 366 static struct tc_action_ops act_police_ops = { 367 .kind = "police", 368 .hinfo = &police_hash_info, 369 .type = TCA_ID_POLICE, 370 .capab = TCA_CAP_NONE, 371 .owner = THIS_MODULE, 372 .act = tcf_act_police, 373 .dump = tcf_act_police_dump, 374 .cleanup = tcf_act_police_cleanup, 375 .lookup = tcf_hash_search, 376 .init = tcf_act_police_locate, 377 .walk = tcf_act_police_walker 378 }; 379 380 static int __init 381 police_init_module(void) 382 { 383 return tcf_register_action(&act_police_ops); 384 } 385 386 static void __exit 387 police_cleanup_module(void) 388 { 389 tcf_unregister_action(&act_police_ops); 390 } 391 392 module_init(police_init_module); 393 module_exit(police_cleanup_module); 394 395 #else /* CONFIG_NET_CLS_ACT */ 396 397 static struct tcf_common *tcf_police_lookup(u32 index) 398 { 399 struct tcf_hashinfo *hinfo = &police_hash_info; 400 struct tcf_common *p; 401 402 read_lock(hinfo->lock); 403 for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; 404 p = p->tcfc_next) { 405 if (p->tcfc_index == index) 406 break; 407 } 408 read_unlock(hinfo->lock); 409 410 return p; 411 } 412 413 static u32 tcf_police_new_index(void) 414 { 415 u32 *idx_gen = &police_idx_gen; 416 u32 val = *idx_gen; 417 418 do { 419 if (++val == 0) 420 val = 1; 421 } while (tcf_police_lookup(val)); 422 423 return (*idx_gen = val); 424 } 425 426 struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) 427 { 428 unsigned int h; 429 struct tcf_police *police; 430 struct rtattr *tb[TCA_POLICE_MAX]; 431 struct tc_police *parm; 432 int size; 433 434 if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 435 return NULL; 436 437 if (tb[TCA_POLICE_TBF-1] == NULL) 438 return NULL; 439 size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); 440 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) 441 return NULL; 442 443 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 444 445 if (parm->index) { 446 struct tcf_common *pc; 447 448 pc = tcf_police_lookup(parm->index); 449 if (pc) { 450 police = to_police(pc); 451 police->tcf_refcnt++; 452 return police; 453 } 454 } 455 police = kzalloc(sizeof(*police), GFP_KERNEL); 456 if (unlikely(!police)) 457 return NULL; 458 459 police->tcf_refcnt = 1; 460 spin_lock_init(&police->tcf_lock); 461 police->tcf_stats_lock = &police->tcf_lock; 462 if (parm->rate.rate) { 463 police->tcfp_R_tab = 464 qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 465 if (police->tcfp_R_tab == NULL) 466 goto failure; 467 if (parm->peakrate.rate) { 468 police->tcfp_P_tab = 469 qdisc_get_rtab(&parm->peakrate, 470 tb[TCA_POLICE_PEAKRATE-1]); 471 if (police->tcfp_P_tab == NULL) 472 goto failure; 473 } 474 } 475 if (tb[TCA_POLICE_RESULT-1]) { 476 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 477 goto failure; 478 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 479 } 480 #ifdef CONFIG_NET_ESTIMATOR 481 if (tb[TCA_POLICE_AVRATE-1]) { 482 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) 483 goto failure; 484 police->tcfp_ewma_rate = 485 *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 486 } 487 #endif 488 police->tcfp_toks = police->tcfp_burst = parm->burst; 489 police->tcfp_mtu = parm->mtu; 490 if (police->tcfp_mtu == 0) { 491 police->tcfp_mtu = ~0; 492 if (police->tcfp_R_tab) 493 police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; 494 } 495 if (police->tcfp_P_tab) 496 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); 497 PSCHED_GET_TIME(police->tcfp_t_c); 498 police->tcf_index = parm->index ? parm->index : 499 tcf_police_new_index(); 500 police->tcf_action = parm->action; 501 #ifdef CONFIG_NET_ESTIMATOR 502 if (est) 503 gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, 504 police->tcf_stats_lock, est); 505 #endif 506 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 507 write_lock_bh(&police_lock); 508 police->tcf_next = tcf_police_ht[h]; 509 tcf_police_ht[h] = &police->common; 510 write_unlock_bh(&police_lock); 511 return police; 512 513 failure: 514 if (police->tcfp_R_tab) 515 qdisc_put_rtab(police->tcfp_R_tab); 516 kfree(police); 517 return NULL; 518 } 519 520 int tcf_police(struct sk_buff *skb, struct tcf_police *police) 521 { 522 psched_time_t now; 523 long toks; 524 long ptoks = 0; 525 526 spin_lock(&police->tcf_lock); 527 528 police->tcf_bstats.bytes += skb->len; 529 police->tcf_bstats.packets++; 530 531 #ifdef CONFIG_NET_ESTIMATOR 532 if (police->tcfp_ewma_rate && 533 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { 534 police->tcf_qstats.overlimits++; 535 spin_unlock(&police->tcf_lock); 536 return police->tcf_action; 537 } 538 #endif 539 if (skb->len <= police->tcfp_mtu) { 540 if (police->tcfp_R_tab == NULL) { 541 spin_unlock(&police->tcf_lock); 542 return police->tcfp_result; 543 } 544 545 PSCHED_GET_TIME(now); 546 toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, 547 police->tcfp_burst); 548 if (police->tcfp_P_tab) { 549 ptoks = toks + police->tcfp_ptoks; 550 if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) 551 ptoks = (long)L2T_P(police, police->tcfp_mtu); 552 ptoks -= L2T_P(police, skb->len); 553 } 554 toks += police->tcfp_toks; 555 if (toks > (long)police->tcfp_burst) 556 toks = police->tcfp_burst; 557 toks -= L2T(police, skb->len); 558 if ((toks|ptoks) >= 0) { 559 police->tcfp_t_c = now; 560 police->tcfp_toks = toks; 561 police->tcfp_ptoks = ptoks; 562 spin_unlock(&police->tcf_lock); 563 return police->tcfp_result; 564 } 565 } 566 567 police->tcf_qstats.overlimits++; 568 spin_unlock(&police->tcf_lock); 569 return police->tcf_action; 570 } 571 EXPORT_SYMBOL(tcf_police); 572 573 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) 574 { 575 unsigned char *b = skb->tail; 576 struct tc_police opt; 577 578 opt.index = police->tcf_index; 579 opt.action = police->tcf_action; 580 opt.mtu = police->tcfp_mtu; 581 opt.burst = police->tcfp_burst; 582 if (police->tcfp_R_tab) 583 opt.rate = police->tcfp_R_tab->rate; 584 else 585 memset(&opt.rate, 0, sizeof(opt.rate)); 586 if (police->tcfp_P_tab) 587 opt.peakrate = police->tcfp_P_tab->rate; 588 else 589 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 590 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 591 if (police->tcfp_result) 592 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), 593 &police->tcfp_result); 594 #ifdef CONFIG_NET_ESTIMATOR 595 if (police->tcfp_ewma_rate) 596 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); 597 #endif 598 return skb->len; 599 600 rtattr_failure: 601 skb_trim(skb, b - skb->data); 602 return -1; 603 } 604 605 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) 606 { 607 struct gnet_dump d; 608 609 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 610 TCA_XSTATS, police->tcf_stats_lock, 611 &d) < 0) 612 goto errout; 613 614 if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || 615 #ifdef CONFIG_NET_ESTIMATOR 616 gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || 617 #endif 618 gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) 619 goto errout; 620 621 if (gnet_stats_finish_copy(&d) < 0) 622 goto errout; 623 624 return 0; 625 626 errout: 627 return -1; 628 } 629 630 #endif /* CONFIG_NET_CLS_ACT */ 631