1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Codel/FQ_Codel and PIE/FQ-PIE Code: 5 * Copyright (C) 2016 Centre for Advanced Internet Architectures, 6 * Swinburne University of Technology, Melbourne, Australia. 7 * Portions of this code were made possible in part by a gift from 8 * The Comcast Innovation Fund. 9 * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> 10 * 11 * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa 12 * Portions Copyright (c) 2000 Akamba Corp. 13 * All rights reserved 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 /* 41 * Configuration and internal object management for dummynet. 42 */ 43 44 #include "opt_inet6.h" 45 46 #include <sys/param.h> 47 #include <sys/ck.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/mbuf.h> 51 #include <sys/kernel.h> 52 #include <sys/lock.h> 53 #include <sys/module.h> 54 #include <sys/mutex.h> 55 #include <sys/priv.h> 56 #include <sys/proc.h> 57 #include <sys/rwlock.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/time.h> 61 #include <sys/taskqueue.h> 62 #include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ 63 #include <netinet/in.h> 64 #include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ 65 #include <netinet/ip_fw.h> 66 #include <netinet/ip_dummynet.h> 67 #include <net/vnet.h> 68 69 #include <netpfil/ipfw/ip_fw_private.h> 70 #include <netpfil/ipfw/dn_heap.h> 71 #include <netpfil/ipfw/ip_dn_private.h> 72 #ifdef NEW_AQM 73 #include <netpfil/ipfw/dn_aqm.h> 74 #endif 75 #include <netpfil/ipfw/dn_sched.h> 76 77 /* which objects to copy */ 78 #define DN_C_LINK 0x01 79 #define DN_C_SCH 0x02 80 #define DN_C_FLOW 0x04 81 #define DN_C_FS 0x08 82 #define DN_C_QUEUE 0x10 83 84 /* we use this argument in case of a schk_new */ 85 struct schk_new_arg { 86 struct dn_alg *fp; 87 struct dn_sch *sch; 88 }; 89 90 /*---- callout hooks. ----*/ 91 static struct callout dn_timeout; 92 static int dn_tasks_started = 0; 93 static int dn_gone; 94 static struct task dn_task; 95 static struct taskqueue *dn_tq = NULL; 96 97 /* global scheduler list */ 98 struct mtx sched_mtx; 99 CK_LIST_HEAD(, dn_alg) schedlist; 100 #ifdef NEW_AQM 101 CK_LIST_HEAD(, dn_aqm) aqmlist; /* list of AQMs */ 102 #endif 103 104 static void 105 dummynet(void *arg) 106 { 107 108 (void)arg; /* UNUSED */ 109 taskqueue_enqueue(dn_tq, &dn_task); 110 } 111 112 void 113 dummynet_sched_lock(void) 114 { 115 mtx_lock(&sched_mtx); 116 } 117 118 void 119 dummynet_sched_unlock(void) 120 { 121 mtx_unlock(&sched_mtx); 122 } 123 124 void 125 dn_reschedule(void) 126 { 127 128 if (dn_gone != 0) 129 return; 130 callout_reset_sbt(&dn_timeout, tick_sbt, 0, dummynet, NULL, 131 C_HARDCLOCK | C_DIRECT_EXEC); 132 } 133 /*----- end of callout hooks -----*/ 134 135 #ifdef NEW_AQM 136 /* Return AQM descriptor for given type or name. */ 137 static struct dn_aqm * 138 find_aqm_type(int type, char *name) 139 { 140 struct dn_aqm *d; 141 142 NET_EPOCH_ASSERT(); 143 144 CK_LIST_FOREACH(d, &aqmlist, next) { 145 if (d->type == type || (name && !strcasecmp(d->name, name))) 146 return d; 147 } 148 return NULL; /* not found */ 149 } 150 #endif 151 152 /* Return a scheduler descriptor given the type or name. */ 153 static struct dn_alg * 154 find_sched_type(int type, char *name) 155 { 156 struct dn_alg *d; 157 158 NET_EPOCH_ASSERT(); 159 160 CK_LIST_FOREACH(d, &schedlist, next) { 161 if (d->type == type || (name && !strcasecmp(d->name, name))) 162 return d; 163 } 164 return NULL; /* not found */ 165 } 166 167 int 168 ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) 169 { 170 int oldv = *v; 171 const char *op = NULL; 172 if (dflt < lo) 173 dflt = lo; 174 if (dflt > hi) 175 dflt = hi; 176 if (oldv < lo) { 177 *v = dflt; 178 op = "Bump"; 179 } else if (oldv > hi) { 180 *v = hi; 181 op = "Clamp"; 182 } else 183 return *v; 184 if (op && msg && bootverbose) 185 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 186 return *v; 187 } 188 189 /*---- flow_id mask, hash and compare functions ---*/ 190 /* 191 * The flow_id includes the 5-tuple, the queue/pipe number 192 * which we store in the extra area in host order, 193 * and for ipv6 also the flow_id6. 194 * XXX see if we want the tos byte (can store in 'flags') 195 */ 196 static struct ipfw_flow_id * 197 flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) 198 { 199 int is_v6 = IS_IP6_FLOW_ID(id); 200 201 id->dst_port &= mask->dst_port; 202 id->src_port &= mask->src_port; 203 id->proto &= mask->proto; 204 id->extra &= mask->extra; 205 if (is_v6) { 206 APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); 207 APPLY_MASK(&id->src_ip6, &mask->src_ip6); 208 id->flow_id6 &= mask->flow_id6; 209 } else { 210 id->dst_ip &= mask->dst_ip; 211 id->src_ip &= mask->src_ip; 212 } 213 return id; 214 } 215 216 /* computes an OR of two masks, result in dst and also returned */ 217 static struct ipfw_flow_id * 218 flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) 219 { 220 int is_v6 = IS_IP6_FLOW_ID(dst); 221 222 dst->dst_port |= src->dst_port; 223 dst->src_port |= src->src_port; 224 dst->proto |= src->proto; 225 dst->extra |= src->extra; 226 if (is_v6) { 227 #define OR_MASK(_d, _s) \ 228 (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ 229 (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ 230 (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ 231 (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; 232 OR_MASK(&dst->dst_ip6, &src->dst_ip6); 233 OR_MASK(&dst->src_ip6, &src->src_ip6); 234 #undef OR_MASK 235 dst->flow_id6 |= src->flow_id6; 236 } else { 237 dst->dst_ip |= src->dst_ip; 238 dst->src_ip |= src->src_ip; 239 } 240 return dst; 241 } 242 243 static int 244 nonzero_mask(struct ipfw_flow_id *m) 245 { 246 if (m->dst_port || m->src_port || m->proto || m->extra) 247 return 1; 248 if (IS_IP6_FLOW_ID(m)) { 249 return 250 m->dst_ip6.__u6_addr.__u6_addr32[0] || 251 m->dst_ip6.__u6_addr.__u6_addr32[1] || 252 m->dst_ip6.__u6_addr.__u6_addr32[2] || 253 m->dst_ip6.__u6_addr.__u6_addr32[3] || 254 m->src_ip6.__u6_addr.__u6_addr32[0] || 255 m->src_ip6.__u6_addr.__u6_addr32[1] || 256 m->src_ip6.__u6_addr.__u6_addr32[2] || 257 m->src_ip6.__u6_addr.__u6_addr32[3] || 258 m->flow_id6; 259 } else { 260 return m->dst_ip || m->src_ip; 261 } 262 } 263 264 /* XXX we may want a better hash function */ 265 static uint32_t 266 flow_id_hash(struct ipfw_flow_id *id) 267 { 268 uint32_t i; 269 270 if (IS_IP6_FLOW_ID(id)) { 271 uint32_t *d = (uint32_t *)&id->dst_ip6; 272 uint32_t *s = (uint32_t *)&id->src_ip6; 273 i = (d[0] ) ^ (d[1]) ^ 274 (d[2] ) ^ (d[3]) ^ 275 (d[0] >> 15) ^ (d[1] >> 15) ^ 276 (d[2] >> 15) ^ (d[3] >> 15) ^ 277 (s[0] << 1) ^ (s[1] << 1) ^ 278 (s[2] << 1) ^ (s[3] << 1) ^ 279 (s[0] << 16) ^ (s[1] << 16) ^ 280 (s[2] << 16) ^ (s[3] << 16) ^ 281 (id->dst_port << 1) ^ (id->src_port) ^ 282 (id->extra) ^ 283 (id->proto ) ^ (id->flow_id6); 284 } else { 285 i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ 286 (id->src_ip << 1) ^ (id->src_ip >> 16) ^ 287 (id->extra) ^ 288 (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); 289 } 290 return i; 291 } 292 293 /* Like bcmp, returns 0 if ids match, 1 otherwise. */ 294 static int 295 flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) 296 { 297 int is_v6 = IS_IP6_FLOW_ID(id1); 298 299 if (!is_v6) { 300 if (IS_IP6_FLOW_ID(id2)) 301 return 1; /* different address families */ 302 303 return (id1->dst_ip == id2->dst_ip && 304 id1->src_ip == id2->src_ip && 305 id1->dst_port == id2->dst_port && 306 id1->src_port == id2->src_port && 307 id1->proto == id2->proto && 308 id1->extra == id2->extra) ? 0 : 1; 309 } 310 /* the ipv6 case */ 311 return ( 312 !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && 313 !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && 314 id1->dst_port == id2->dst_port && 315 id1->src_port == id2->src_port && 316 id1->proto == id2->proto && 317 id1->extra == id2->extra && 318 id1->flow_id6 == id2->flow_id6) ? 0 : 1; 319 } 320 /*--------- end of flow-id mask, hash and compare ---------*/ 321 322 /*--- support functions for the qht hashtable ---- 323 * Entries are hashed by flow-id 324 */ 325 static uint32_t 326 q_hash(uintptr_t key, int flags, void *arg) 327 { 328 /* compute the hash slot from the flow id */ 329 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 330 &((struct dn_queue *)key)->ni.fid : 331 (struct ipfw_flow_id *)key; 332 333 return flow_id_hash(id); 334 } 335 336 static int 337 q_match(void *obj, uintptr_t key, int flags, void *arg) 338 { 339 struct dn_queue *o = (struct dn_queue *)obj; 340 struct ipfw_flow_id *id2; 341 342 if (flags & DNHT_KEY_IS_OBJ) { 343 /* compare pointers */ 344 id2 = &((struct dn_queue *)key)->ni.fid; 345 } else { 346 id2 = (struct ipfw_flow_id *)key; 347 } 348 return (0 == flow_id_cmp(&o->ni.fid, id2)); 349 } 350 351 /* 352 * create a new queue instance for the given 'key'. 353 */ 354 static void * 355 q_new(uintptr_t key, int flags, void *arg) 356 { 357 struct dn_queue *q, *template = arg; 358 struct dn_fsk *fs = template->fs; 359 int size = sizeof(*q) + fs->sched->fp->q_datalen; 360 361 q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO); 362 if (q == NULL) { 363 D("no memory for new queue"); 364 return NULL; 365 } 366 367 set_oid(&q->ni.oid, DN_QUEUE, size); 368 if (fs->fs.flags & DN_QHT_HASH) 369 q->ni.fid = *(struct ipfw_flow_id *)key; 370 q->fs = fs; 371 q->_si = template->_si; 372 q->_si->q_count++; 373 374 if (fs->sched->fp->new_queue) 375 fs->sched->fp->new_queue(q); 376 377 #ifdef NEW_AQM 378 /* call AQM init function after creating a queue*/ 379 if (fs->aqmfp && fs->aqmfp->init) 380 if(fs->aqmfp->init(q)) 381 D("unable to init AQM for fs %d", fs->fs.fs_nr); 382 #endif 383 V_dn_cfg.queue_count++; 384 385 return q; 386 } 387 388 /* 389 * Notify schedulers that a queue is going away. 390 * If (flags & DN_DESTROY), also free the packets. 391 * The version for callbacks is called q_delete_cb(). 392 */ 393 static void 394 dn_delete_queue(struct dn_queue *q, int flags) 395 { 396 struct dn_fsk *fs = q->fs; 397 398 #ifdef NEW_AQM 399 /* clean up AQM status for queue 'q' 400 * cleanup here is called just with MULTIQUEUE 401 */ 402 if (fs && fs->aqmfp && fs->aqmfp->cleanup) 403 fs->aqmfp->cleanup(q); 404 #endif 405 // D("fs %p si %p\n", fs, q->_si); 406 /* notify the parent scheduler that the queue is going away */ 407 if (fs && fs->sched->fp->free_queue) 408 fs->sched->fp->free_queue(q); 409 q->_si->q_count--; 410 q->_si = NULL; 411 if (flags & DN_DESTROY) { 412 if (q->mq.head) 413 dn_free_pkts(q->mq.head); 414 bzero(q, sizeof(*q)); // safety 415 free(q, M_DUMMYNET); 416 V_dn_cfg.queue_count--; 417 } 418 } 419 420 static int 421 q_delete_cb(void *q, void *arg) 422 { 423 int flags = (int)(uintptr_t)arg; 424 dn_delete_queue(q, flags); 425 return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0; 426 } 427 428 /* 429 * calls dn_delete_queue/q_delete_cb on all queues, 430 * which notifies the parent scheduler and possibly drains packets. 431 * flags & DN_DESTROY: drains queues and destroy qht; 432 */ 433 static void 434 qht_delete(struct dn_fsk *fs, int flags) 435 { 436 ND("fs %d start flags %d qht %p", 437 fs->fs.fs_nr, flags, fs->qht); 438 if (!fs->qht) 439 return; 440 if (fs->fs.flags & DN_QHT_HASH) { 441 dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags); 442 if (flags & DN_DESTROY) { 443 dn_ht_free(fs->qht, 0); 444 fs->qht = NULL; 445 } 446 } else { 447 dn_delete_queue((struct dn_queue *)(fs->qht), flags); 448 if (flags & DN_DESTROY) 449 fs->qht = NULL; 450 } 451 } 452 453 /* 454 * Find and possibly create the queue for a MULTIQUEUE scheduler. 455 * We never call it for !MULTIQUEUE (the queue is in the sch_inst). 456 */ 457 struct dn_queue * 458 ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, 459 struct ipfw_flow_id *id) 460 { 461 struct dn_queue template; 462 463 template._si = si; 464 template.fs = fs; 465 466 if (fs->fs.flags & DN_QHT_HASH) { 467 struct ipfw_flow_id masked_id; 468 if (fs->qht == NULL) { 469 fs->qht = dn_ht_init(NULL, fs->fs.buckets, 470 offsetof(struct dn_queue, q_next), 471 q_hash, q_match, q_new); 472 if (fs->qht == NULL) 473 return NULL; 474 } 475 masked_id = *id; 476 flow_id_mask(&fs->fsk_mask, &masked_id); 477 return dn_ht_find(fs->qht, (uintptr_t)&masked_id, 478 DNHT_INSERT, &template); 479 } else { 480 if (fs->qht == NULL) 481 fs->qht = q_new(0, 0, &template); 482 return (struct dn_queue *)fs->qht; 483 } 484 } 485 /*--- end of queue hash table ---*/ 486 487 /*--- support functions for the sch_inst hashtable ---- 488 * 489 * These are hashed by flow-id 490 */ 491 static uint32_t 492 si_hash(uintptr_t key, int flags, void *arg) 493 { 494 /* compute the hash slot from the flow id */ 495 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 496 &((struct dn_sch_inst *)key)->ni.fid : 497 (struct ipfw_flow_id *)key; 498 499 return flow_id_hash(id); 500 } 501 502 static int 503 si_match(void *obj, uintptr_t key, int flags, void *arg) 504 { 505 struct dn_sch_inst *o = obj; 506 struct ipfw_flow_id *id2; 507 508 id2 = (flags & DNHT_KEY_IS_OBJ) ? 509 &((struct dn_sch_inst *)key)->ni.fid : 510 (struct ipfw_flow_id *)key; 511 return flow_id_cmp(&o->ni.fid, id2) == 0; 512 } 513 514 /* 515 * create a new instance for the given 'key' 516 * Allocate memory for instance, delay line and scheduler private data. 517 */ 518 static void * 519 si_new(uintptr_t key, int flags, void *arg) 520 { 521 struct dn_schk *s = arg; 522 struct dn_sch_inst *si; 523 int l = sizeof(*si) + s->fp->si_datalen; 524 525 si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 526 if (si == NULL) 527 goto error; 528 529 /* Set length only for the part passed up to userland. */ 530 set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); 531 set_oid(&(si->dline.oid), DN_DELAY_LINE, 532 sizeof(struct delay_line)); 533 /* mark si and dline as outside the event queue */ 534 si->ni.oid.id = si->dline.oid.id = -1; 535 536 si->sched = s; 537 si->dline.si = si; 538 539 if (s->fp->new_sched && s->fp->new_sched(si)) { 540 D("new_sched error"); 541 goto error; 542 } 543 if (s->sch.flags & DN_HAVE_MASK) 544 si->ni.fid = *(struct ipfw_flow_id *)key; 545 546 #ifdef NEW_AQM 547 /* init AQM status for !DN_MULTIQUEUE sched*/ 548 if (!(s->fp->flags & DN_MULTIQUEUE)) 549 if (s->fs->aqmfp && s->fs->aqmfp->init) 550 if(s->fs->aqmfp->init((struct dn_queue *)(si + 1))) { 551 D("unable to init AQM for fs %d", s->fs->fs.fs_nr); 552 goto error; 553 } 554 #endif 555 556 V_dn_cfg.si_count++; 557 return si; 558 559 error: 560 if (si) { 561 bzero(si, sizeof(*si)); // safety 562 free(si, M_DUMMYNET); 563 } 564 return NULL; 565 } 566 567 /* 568 * Callback from siht to delete all scheduler instances. Remove 569 * si and delay line from the system heap, destroy all queues. 570 * We assume that all flowset have been notified and do not 571 * point to us anymore. 572 */ 573 static int 574 si_destroy(void *_si, void *arg) 575 { 576 struct dn_sch_inst *si = _si; 577 struct dn_schk *s = si->sched; 578 struct delay_line *dl = &si->dline; 579 580 if (dl->oid.subtype) /* remove delay line from event heap */ 581 heap_extract(&V_dn_cfg.evheap, dl); 582 dn_free_pkts(dl->mq.head); /* drain delay line */ 583 if (si->kflags & DN_ACTIVE) /* remove si from event heap */ 584 heap_extract(&V_dn_cfg.evheap, si); 585 586 #ifdef NEW_AQM 587 /* clean up AQM status for !DN_MULTIQUEUE sched 588 * Note that all queues belong to fs were cleaned up in fsk_detach. 589 * When drain_scheduler is called s->fs and q->fs are pointing 590 * to a correct fs, so we can use fs in this case. 591 */ 592 if (!(s->fp->flags & DN_MULTIQUEUE)) { 593 struct dn_queue *q = (struct dn_queue *)(si + 1); 594 if (q->aqm_status && q->fs->aqmfp) 595 if (q->fs->aqmfp->cleanup) 596 q->fs->aqmfp->cleanup(q); 597 } 598 #endif 599 if (s->fp->free_sched) 600 s->fp->free_sched(si); 601 bzero(si, sizeof(*si)); /* safety */ 602 free(si, M_DUMMYNET); 603 V_dn_cfg.si_count--; 604 return DNHT_SCAN_DEL; 605 } 606 607 /* 608 * Find the scheduler instance for this packet. If we need to apply 609 * a mask, do on a local copy of the flow_id to preserve the original. 610 * Assume siht is always initialized if we have a mask. 611 */ 612 struct dn_sch_inst * 613 ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id) 614 { 615 616 if (s->sch.flags & DN_HAVE_MASK) { 617 struct ipfw_flow_id id_t = *id; 618 flow_id_mask(&s->sch.sched_mask, &id_t); 619 return dn_ht_find(s->siht, (uintptr_t)&id_t, 620 DNHT_INSERT, s); 621 } 622 if (!s->siht) 623 s->siht = si_new(0, 0, s); 624 return (struct dn_sch_inst *)s->siht; 625 } 626 627 /* callback to flush credit for the scheduler instance */ 628 static int 629 si_reset_credit(void *_si, void *arg) 630 { 631 struct dn_sch_inst *si = _si; 632 struct dn_link *p = &si->sched->link; 633 634 si->credit = p->burst + (V_dn_cfg.io_fast ? p->bandwidth : 0); 635 return 0; 636 } 637 638 static void 639 schk_reset_credit(struct dn_schk *s) 640 { 641 if (s->sch.flags & DN_HAVE_MASK) 642 dn_ht_scan(s->siht, si_reset_credit, NULL); 643 else if (s->siht) 644 si_reset_credit(s->siht, NULL); 645 } 646 /*---- end of sch_inst hashtable ---------------------*/ 647 648 /*------------------------------------------------------- 649 * flowset hash (fshash) support. Entries are hashed by fs_nr. 650 * New allocations are put in the fsunlinked list, from which 651 * they are removed when they point to a specific scheduler. 652 */ 653 static uint32_t 654 fsk_hash(uintptr_t key, int flags, void *arg) 655 { 656 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 657 ((struct dn_fsk *)key)->fs.fs_nr; 658 659 return ( (i>>8)^(i>>4)^i ); 660 } 661 662 static int 663 fsk_match(void *obj, uintptr_t key, int flags, void *arg) 664 { 665 struct dn_fsk *fs = obj; 666 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 667 ((struct dn_fsk *)key)->fs.fs_nr; 668 669 return (fs->fs.fs_nr == i); 670 } 671 672 static void * 673 fsk_new(uintptr_t key, int flags, void *arg) 674 { 675 struct dn_fsk *fs; 676 677 fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO); 678 if (fs) { 679 set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs)); 680 V_dn_cfg.fsk_count++; 681 fs->drain_bucket = 0; 682 SLIST_INSERT_HEAD(&V_dn_cfg.fsu, fs, sch_chain); 683 } 684 return fs; 685 } 686 687 #ifdef NEW_AQM 688 /* callback function for cleaning up AQM queue status belongs to a flowset 689 * connected to scheduler instance '_si' (for !DN_MULTIQUEUE only). 690 */ 691 static int 692 si_cleanup_q(void *_si, void *arg) 693 { 694 struct dn_sch_inst *si = _si; 695 696 if (!(si->sched->fp->flags & DN_MULTIQUEUE)) { 697 if (si->sched->fs->aqmfp && si->sched->fs->aqmfp->cleanup) 698 si->sched->fs->aqmfp->cleanup((struct dn_queue *) (si+1)); 699 } 700 return 0; 701 } 702 703 /* callback to clean up queue AQM status.*/ 704 static int 705 q_cleanup_q(void *_q, void *arg) 706 { 707 struct dn_queue *q = _q; 708 q->fs->aqmfp->cleanup(q); 709 return 0; 710 } 711 712 /* Clean up all AQM queues status belongs to flowset 'fs' and then 713 * deconfig AQM for flowset 'fs' 714 */ 715 static void 716 aqm_cleanup_deconfig_fs(struct dn_fsk *fs) 717 { 718 struct dn_sch_inst *si; 719 720 /* clean up AQM status for all queues for !DN_MULTIQUEUE sched*/ 721 if (fs->fs.fs_nr > DN_MAX_ID) { 722 if (fs->sched && !(fs->sched->fp->flags & DN_MULTIQUEUE)) { 723 if (fs->sched->sch.flags & DN_HAVE_MASK) 724 dn_ht_scan(fs->sched->siht, si_cleanup_q, NULL); 725 else { 726 /* single si i.e. no sched mask */ 727 si = (struct dn_sch_inst *) fs->sched->siht; 728 if (si && fs->aqmfp && fs->aqmfp->cleanup) 729 fs->aqmfp->cleanup((struct dn_queue *) (si+1)); 730 } 731 } 732 } 733 734 /* clean up AQM status for all queues for DN_MULTIQUEUE sched*/ 735 if (fs->sched && fs->sched->fp->flags & DN_MULTIQUEUE && fs->qht) { 736 if (fs->fs.flags & DN_QHT_HASH) 737 dn_ht_scan(fs->qht, q_cleanup_q, NULL); 738 else 739 fs->aqmfp->cleanup((struct dn_queue *)(fs->qht)); 740 } 741 742 /* deconfig AQM */ 743 if(fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig) 744 fs->aqmfp->deconfig(fs); 745 } 746 #endif 747 748 /* 749 * detach flowset from its current scheduler. Flags as follows: 750 * DN_DETACH removes from the fsk_list 751 * DN_DESTROY deletes individual queues 752 * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked). 753 */ 754 static void 755 fsk_detach(struct dn_fsk *fs, int flags) 756 { 757 if (flags & DN_DELETE_FS) 758 flags |= DN_DESTROY; 759 ND("fs %d from sched %d flags %s %s %s", 760 fs->fs.fs_nr, fs->fs.sched_nr, 761 (flags & DN_DELETE_FS) ? "DEL_FS":"", 762 (flags & DN_DESTROY) ? "DEL":"", 763 (flags & DN_DETACH) ? "DET":""); 764 if (flags & DN_DETACH) { /* detach from the list */ 765 struct dn_fsk_head *h; 766 h = fs->sched ? &fs->sched->fsk_list : &V_dn_cfg.fsu; 767 SLIST_REMOVE(h, fs, dn_fsk, sch_chain); 768 } 769 /* Free the RED parameters, they will be recomputed on 770 * subsequent attach if needed. 771 */ 772 free(fs->w_q_lookup, M_DUMMYNET); 773 fs->w_q_lookup = NULL; 774 qht_delete(fs, flags); 775 #ifdef NEW_AQM 776 aqm_cleanup_deconfig_fs(fs); 777 #endif 778 779 if (fs->sched && fs->sched->fp->free_fsk) 780 fs->sched->fp->free_fsk(fs); 781 fs->sched = NULL; 782 if (flags & DN_DELETE_FS) { 783 bzero(fs, sizeof(*fs)); /* safety */ 784 free(fs, M_DUMMYNET); 785 V_dn_cfg.fsk_count--; 786 } else { 787 SLIST_INSERT_HEAD(&V_dn_cfg.fsu, fs, sch_chain); 788 } 789 } 790 791 /* 792 * Detach or destroy all flowsets in a list. 793 * flags specifies what to do: 794 * DN_DESTROY: flush all queues 795 * DN_DELETE_FS: DN_DESTROY + destroy flowset 796 * DN_DELETE_FS implies DN_DESTROY 797 */ 798 static void 799 fsk_detach_list(struct dn_fsk_head *h, int flags) 800 { 801 struct dn_fsk *fs; 802 int n __unused = 0; /* only for stats */ 803 804 ND("head %p flags %x", h, flags); 805 while ((fs = SLIST_FIRST(h))) { 806 SLIST_REMOVE_HEAD(h, sch_chain); 807 n++; 808 fsk_detach(fs, flags); 809 } 810 ND("done %d flowsets", n); 811 } 812 813 /* 814 * called on 'queue X delete' -- removes the flowset from fshash, 815 * deletes all queues for the flowset, and removes the flowset. 816 */ 817 static int 818 delete_fs(int i, int locked) 819 { 820 struct dn_fsk *fs; 821 int err = 0; 822 823 if (!locked) 824 DN_BH_WLOCK(); 825 fs = dn_ht_find(V_dn_cfg.fshash, i, DNHT_REMOVE, NULL); 826 ND("fs %d found %p", i, fs); 827 if (fs) { 828 fsk_detach(fs, DN_DETACH | DN_DELETE_FS); 829 err = 0; 830 } else 831 err = EINVAL; 832 if (!locked) 833 DN_BH_WUNLOCK(); 834 return err; 835 } 836 837 /*----- end of flowset hashtable support -------------*/ 838 839 /*------------------------------------------------------------ 840 * Scheduler hash. When searching by index we pass sched_nr, 841 * otherwise we pass struct dn_sch * which is the first field in 842 * struct dn_schk so we can cast between the two. We use this trick 843 * because in the create phase (but it should be fixed). 844 */ 845 static uint32_t 846 schk_hash(uintptr_t key, int flags, void *_arg) 847 { 848 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 849 ((struct dn_schk *)key)->sch.sched_nr; 850 return ( (i>>8)^(i>>4)^i ); 851 } 852 853 static int 854 schk_match(void *obj, uintptr_t key, int flags, void *_arg) 855 { 856 struct dn_schk *s = (struct dn_schk *)obj; 857 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 858 ((struct dn_schk *)key)->sch.sched_nr; 859 return (s->sch.sched_nr == i); 860 } 861 862 /* 863 * Create the entry and intialize with the sched hash if needed. 864 * Leave s->fp unset so we can tell whether a dn_ht_find() returns 865 * a new object or a previously existing one. 866 */ 867 static void * 868 schk_new(uintptr_t key, int flags, void *arg) 869 { 870 struct schk_new_arg *a = arg; 871 struct dn_schk *s; 872 int l = sizeof(*s) +a->fp->schk_datalen; 873 874 s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 875 if (s == NULL) 876 return NULL; 877 set_oid(&s->link.oid, DN_LINK, sizeof(s->link)); 878 s->sch = *a->sch; // copy initial values 879 s->link.link_nr = s->sch.sched_nr; 880 SLIST_INIT(&s->fsk_list); 881 /* initialize the hash table or create the single instance */ 882 s->fp = a->fp; /* si_new needs this */ 883 s->drain_bucket = 0; 884 if (s->sch.flags & DN_HAVE_MASK) { 885 s->siht = dn_ht_init(NULL, s->sch.buckets, 886 offsetof(struct dn_sch_inst, si_next), 887 si_hash, si_match, si_new); 888 if (s->siht == NULL) { 889 free(s, M_DUMMYNET); 890 return NULL; 891 } 892 } 893 s->fp = NULL; /* mark as a new scheduler */ 894 V_dn_cfg.schk_count++; 895 return s; 896 } 897 898 /* 899 * Callback for sched delete. Notify all attached flowsets to 900 * detach from the scheduler, destroy the internal flowset, and 901 * all instances. The scheduler goes away too. 902 * arg is 0 (only detach flowsets and destroy instances) 903 * DN_DESTROY (detach & delete queues, delete schk) 904 * or DN_DELETE_FS (delete queues and flowsets, delete schk) 905 */ 906 static int 907 schk_delete_cb(void *obj, void *arg) 908 { 909 struct dn_schk *s = obj; 910 #if 0 911 int a = (int)arg; 912 ND("sched %d arg %s%s", 913 s->sch.sched_nr, 914 a&DN_DESTROY ? "DEL ":"", 915 a&DN_DELETE_FS ? "DEL_FS":""); 916 #endif 917 fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); 918 /* no more flowset pointing to us now */ 919 if (s->sch.flags & DN_HAVE_MASK) { 920 dn_ht_scan(s->siht, si_destroy, NULL); 921 dn_ht_free(s->siht, 0); 922 } else if (s->siht) 923 si_destroy(s->siht, NULL); 924 925 free(s->profile, M_DUMMYNET); 926 s->profile = NULL; 927 s->siht = NULL; 928 if (s->fp->destroy) 929 s->fp->destroy(s); 930 bzero(s, sizeof(*s)); // safety 931 free(obj, M_DUMMYNET); 932 V_dn_cfg.schk_count--; 933 return DNHT_SCAN_DEL; 934 } 935 936 /* 937 * called on a 'sched X delete' command. Deletes a single scheduler. 938 * This is done by removing from the schedhash, unlinking all 939 * flowsets and deleting their traffic. 940 */ 941 static int 942 delete_schk(int i) 943 { 944 struct dn_schk *s; 945 946 s = dn_ht_find(V_dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 947 ND("%d %p", i, s); 948 if (!s) 949 return EINVAL; 950 delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */ 951 /* then detach flowsets, delete traffic */ 952 schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY); 953 return 0; 954 } 955 /*--- end of schk hashtable support ---*/ 956 957 static int 958 copy_obj(char **start, char *end, void *_o, const char *msg, int i) 959 { 960 struct dn_id o; 961 union { 962 struct dn_link l; 963 struct dn_schk s; 964 } dn; 965 int have = end - *start; 966 967 memcpy(&o, _o, sizeof(o)); 968 if (have < o.len || o.len == 0 || o.type == 0) { 969 D("(WARN) type %d %s %d have %d need %d", 970 o.type, msg, i, have, o.len); 971 return 1; 972 } 973 ND("type %d %s %d len %d", o.type, msg, i, o.len); 974 if (o.type == DN_LINK) { 975 memcpy(&dn.l, _o, sizeof(dn.l)); 976 /* Adjust burst parameter for link */ 977 dn.l.burst = div64(dn.l.burst, 8 * hz); 978 dn.l.delay = dn.l.delay * 1000 / hz; 979 memcpy(*start, &dn.l, sizeof(dn.l)); 980 } else if (o.type == DN_SCH) { 981 /* Set dn.s.sch.oid.id to the number of instances */ 982 memcpy(&dn.s, _o, sizeof(dn.s)); 983 dn.s.sch.oid.id = (dn.s.sch.flags & DN_HAVE_MASK) ? 984 dn_ht_entries(dn.s.siht) : (dn.s.siht ? 1 : 0); 985 memcpy(*start, &dn.s, sizeof(dn.s)); 986 } else 987 memcpy(*start, _o, o.len); 988 *start += o.len; 989 return 0; 990 } 991 992 /* Specific function to copy a queue. 993 * Copies only the user-visible part of a queue (which is in 994 * a struct dn_flow), and sets len accordingly. 995 */ 996 static int 997 copy_obj_q(char **start, char *end, void *_o, const char *msg, int i) 998 { 999 struct dn_id *o = _o; 1000 int have = end - *start; 1001 int len = sizeof(struct dn_flow); /* see above comment */ 1002 1003 if (have < len || o->len == 0 || o->type != DN_QUEUE) { 1004 D("ERROR type %d %s %d have %d need %d", 1005 o->type, msg, i, have, len); 1006 return 1; 1007 } 1008 ND("type %d %s %d len %d", o->type, msg, i, len); 1009 memcpy(*start, _o, len); 1010 ((struct dn_id*)(*start))->len = len; 1011 *start += len; 1012 return 0; 1013 } 1014 1015 static int 1016 copy_q_cb(void *obj, void *arg) 1017 { 1018 struct dn_queue *q = obj; 1019 struct copy_args *a = arg; 1020 struct dn_flow *ni = (struct dn_flow *)(*a->start); 1021 if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1)) 1022 return DNHT_SCAN_END; 1023 ni->oid.type = DN_FLOW; /* override the DN_QUEUE */ 1024 ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL); 1025 return 0; 1026 } 1027 1028 static int 1029 copy_q(struct copy_args *a, struct dn_fsk *fs, int flags) 1030 { 1031 if (!fs->qht) 1032 return 0; 1033 if (fs->fs.flags & DN_QHT_HASH) 1034 dn_ht_scan(fs->qht, copy_q_cb, a); 1035 else 1036 copy_q_cb(fs->qht, a); 1037 return 0; 1038 } 1039 1040 /* 1041 * This routine only copies the initial part of a profile ? XXX 1042 */ 1043 static int 1044 copy_profile(struct copy_args *a, struct dn_profile *p) 1045 { 1046 int have = a->end - *a->start; 1047 /* XXX here we check for max length */ 1048 int profile_len = sizeof(struct dn_profile) - 1049 ED_MAX_SAMPLES_NO*sizeof(int); 1050 1051 if (p == NULL) 1052 return 0; 1053 if (have < profile_len) { 1054 D("error have %d need %d", have, profile_len); 1055 return 1; 1056 } 1057 memcpy(*a->start, p, profile_len); 1058 ((struct dn_id *)(*a->start))->len = profile_len; 1059 *a->start += profile_len; 1060 return 0; 1061 } 1062 1063 static int 1064 copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags) 1065 { 1066 struct dn_fs *ufs = (struct dn_fs *)(*a->start); 1067 if (!fs) 1068 return 0; 1069 ND("flowset %d", fs->fs.fs_nr); 1070 if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr)) 1071 return DNHT_SCAN_END; 1072 ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ? 1073 dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0); 1074 if (flags) { /* copy queues */ 1075 copy_q(a, fs, 0); 1076 } 1077 return 0; 1078 } 1079 1080 static int 1081 copy_si_cb(void *obj, void *arg) 1082 { 1083 struct dn_sch_inst *si = obj; 1084 struct copy_args *a = arg; 1085 struct dn_flow *ni = (struct dn_flow *)(*a->start); 1086 if (copy_obj(a->start, a->end, &si->ni, "inst", 1087 si->sched->sch.sched_nr)) 1088 return DNHT_SCAN_END; 1089 ni->oid.type = DN_FLOW; /* override the DN_SCH_I */ 1090 ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL); 1091 return 0; 1092 } 1093 1094 static int 1095 copy_si(struct copy_args *a, struct dn_schk *s, int flags) 1096 { 1097 if (s->sch.flags & DN_HAVE_MASK) 1098 dn_ht_scan(s->siht, copy_si_cb, a); 1099 else if (s->siht) 1100 copy_si_cb(s->siht, a); 1101 return 0; 1102 } 1103 1104 /* 1105 * compute a list of children of a scheduler and copy up 1106 */ 1107 static int 1108 copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags) 1109 { 1110 struct dn_fsk *fs; 1111 struct dn_id *o; 1112 uint32_t *p; 1113 1114 int n = 0, space = sizeof(*o); 1115 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1116 if (fs->fs.fs_nr < DN_MAX_ID) 1117 n++; 1118 } 1119 space += n * sizeof(uint32_t); 1120 DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n); 1121 if (a->end - *(a->start) < space) 1122 return DNHT_SCAN_END; 1123 o = (struct dn_id *)(*(a->start)); 1124 o->len = space; 1125 *a->start += o->len; 1126 o->type = DN_TEXT; 1127 p = (uint32_t *)(o+1); 1128 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) 1129 if (fs->fs.fs_nr < DN_MAX_ID) 1130 *p++ = fs->fs.fs_nr; 1131 return 0; 1132 } 1133 1134 static int 1135 copy_data_helper(void *_o, void *_arg) 1136 { 1137 struct copy_args *a = _arg; 1138 uint32_t *r = a->extra->r; /* start of first range */ 1139 uint32_t *lim; /* first invalid pointer */ 1140 int n; 1141 1142 lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len); 1143 1144 if (a->type == DN_LINK || a->type == DN_SCH) { 1145 /* pipe|sched show, we receive a dn_schk */ 1146 struct dn_schk *s = _o; 1147 1148 n = s->sch.sched_nr; 1149 if (a->type == DN_SCH && n >= DN_MAX_ID) 1150 return 0; /* not a scheduler */ 1151 if (a->type == DN_LINK && n <= DN_MAX_ID) 1152 return 0; /* not a pipe */ 1153 1154 /* see if the object is within one of our ranges */ 1155 for (;r < lim; r += 2) { 1156 if (n < r[0] || n > r[1]) 1157 continue; 1158 /* Found a valid entry, copy and we are done */ 1159 if (a->flags & DN_C_LINK) { 1160 if (copy_obj(a->start, a->end, 1161 &s->link, "link", n)) 1162 return DNHT_SCAN_END; 1163 if (copy_profile(a, s->profile)) 1164 return DNHT_SCAN_END; 1165 if (copy_flowset(a, s->fs, 0)) 1166 return DNHT_SCAN_END; 1167 } 1168 if (a->flags & DN_C_SCH) { 1169 if (copy_obj(a->start, a->end, 1170 &s->sch, "sched", n)) 1171 return DNHT_SCAN_END; 1172 /* list all attached flowsets */ 1173 if (copy_fsk_list(a, s, 0)) 1174 return DNHT_SCAN_END; 1175 } 1176 if (a->flags & DN_C_FLOW) 1177 copy_si(a, s, 0); 1178 break; 1179 } 1180 } else if (a->type == DN_FS) { 1181 /* queue show, skip internal flowsets */ 1182 struct dn_fsk *fs = _o; 1183 1184 n = fs->fs.fs_nr; 1185 if (n >= DN_MAX_ID) 1186 return 0; 1187 /* see if the object is within one of our ranges */ 1188 for (;r < lim; r += 2) { 1189 if (n < r[0] || n > r[1]) 1190 continue; 1191 if (copy_flowset(a, fs, 0)) 1192 return DNHT_SCAN_END; 1193 copy_q(a, fs, 0); 1194 break; /* we are done */ 1195 } 1196 } 1197 return 0; 1198 } 1199 1200 static inline struct dn_schk * 1201 locate_scheduler(int i) 1202 { 1203 return dn_ht_find(V_dn_cfg.schedhash, i, 0, NULL); 1204 } 1205 1206 /* 1207 * red parameters are in fixed point arithmetic. 1208 */ 1209 static int 1210 config_red(struct dn_fsk *fs) 1211 { 1212 int64_t s, idle, weight, w0; 1213 int t, i; 1214 1215 fs->w_q = fs->fs.w_q; 1216 fs->max_p = fs->fs.max_p; 1217 ND("called"); 1218 /* Doing stuff that was in userland */ 1219 i = fs->sched->link.bandwidth; 1220 s = (i <= 0) ? 0 : 1221 hz * V_dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; 1222 1223 idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */ 1224 fs->lookup_step = div64(idle , V_dn_cfg.red_lookup_depth); 1225 /* fs->lookup_step not scaled, */ 1226 if (!fs->lookup_step) 1227 fs->lookup_step = 1; 1228 w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled 1229 1230 for (t = fs->lookup_step; t > 1; --t) 1231 weight = SCALE_MUL(weight, w0); 1232 fs->lookup_weight = (int)(weight); // scaled 1233 1234 /* Now doing stuff that was in kerneland */ 1235 fs->min_th = SCALE(fs->fs.min_th); 1236 fs->max_th = SCALE(fs->fs.max_th); 1237 1238 if (fs->fs.max_th == fs->fs.min_th) 1239 fs->c_1 = fs->max_p; 1240 else 1241 fs->c_1 = SCALE((int64_t)(fs->max_p)) / (fs->fs.max_th - fs->fs.min_th); 1242 fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th)); 1243 1244 if (fs->fs.flags & DN_IS_GENTLE_RED) { 1245 fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th; 1246 fs->c_4 = SCALE(1) - 2 * fs->max_p; 1247 } 1248 1249 /* If the lookup table already exist, free and create it again. */ 1250 free(fs->w_q_lookup, M_DUMMYNET); 1251 fs->w_q_lookup = NULL; 1252 if (V_dn_cfg.red_lookup_depth == 0) { 1253 printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" 1254 "must be > 0\n"); 1255 fs->fs.flags &= ~DN_IS_RED; 1256 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1257 return (EINVAL); 1258 } 1259 fs->lookup_depth = V_dn_cfg.red_lookup_depth; 1260 fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int), 1261 M_DUMMYNET, M_NOWAIT); 1262 if (fs->w_q_lookup == NULL) { 1263 printf("dummynet: sorry, cannot allocate red lookup table\n"); 1264 fs->fs.flags &= ~DN_IS_RED; 1265 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1266 return(ENOSPC); 1267 } 1268 1269 /* Fill the lookup table with (1 - w_q)^x */ 1270 fs->w_q_lookup[0] = SCALE(1) - fs->w_q; 1271 1272 for (i = 1; i < fs->lookup_depth; i++) 1273 fs->w_q_lookup[i] = 1274 SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight); 1275 1276 if (V_dn_cfg.red_avg_pkt_size < 1) 1277 V_dn_cfg.red_avg_pkt_size = 512; 1278 fs->avg_pkt_size = V_dn_cfg.red_avg_pkt_size; 1279 if (V_dn_cfg.red_max_pkt_size < 1) 1280 V_dn_cfg.red_max_pkt_size = 1500; 1281 fs->max_pkt_size = V_dn_cfg.red_max_pkt_size; 1282 ND("exit"); 1283 return 0; 1284 } 1285 1286 /* Scan all flowset attached to this scheduler and update red */ 1287 static void 1288 update_red(struct dn_schk *s) 1289 { 1290 struct dn_fsk *fs; 1291 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1292 if (fs && (fs->fs.flags & DN_IS_RED)) 1293 config_red(fs); 1294 } 1295 } 1296 1297 /* attach flowset to scheduler s, possibly requeue */ 1298 static void 1299 fsk_attach(struct dn_fsk *fs, struct dn_schk *s) 1300 { 1301 ND("remove fs %d from fsunlinked, link to sched %d", 1302 fs->fs.fs_nr, s->sch.sched_nr); 1303 SLIST_REMOVE(&V_dn_cfg.fsu, fs, dn_fsk, sch_chain); 1304 fs->sched = s; 1305 SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); 1306 if (s->fp->new_fsk) 1307 s->fp->new_fsk(fs); 1308 /* XXX compute fsk_mask */ 1309 fs->fsk_mask = fs->fs.flow_mask; 1310 if (fs->sched->sch.flags & DN_HAVE_MASK) 1311 flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask); 1312 if (fs->qht) { 1313 /* 1314 * we must drain qht according to the old 1315 * type, and reinsert according to the new one. 1316 * The requeue is complex -- in general we need to 1317 * reclassify every single packet. 1318 * For the time being, let's hope qht is never set 1319 * when we reach this point. 1320 */ 1321 D("XXX TODO requeue from fs %d to sch %d", 1322 fs->fs.fs_nr, s->sch.sched_nr); 1323 fs->qht = NULL; 1324 } 1325 /* set the new type for qht */ 1326 if (nonzero_mask(&fs->fsk_mask)) 1327 fs->fs.flags |= DN_QHT_HASH; 1328 else 1329 fs->fs.flags &= ~DN_QHT_HASH; 1330 1331 /* XXX config_red() can fail... */ 1332 if (fs->fs.flags & DN_IS_RED) 1333 config_red(fs); 1334 } 1335 1336 /* update all flowsets which may refer to this scheduler */ 1337 static void 1338 update_fs(struct dn_schk *s) 1339 { 1340 struct dn_fsk *fs, *tmp; 1341 1342 SLIST_FOREACH_SAFE(fs, &V_dn_cfg.fsu, sch_chain, tmp) { 1343 if (s->sch.sched_nr != fs->fs.sched_nr) { 1344 D("fs %d for sch %d not %d still unlinked", 1345 fs->fs.fs_nr, fs->fs.sched_nr, 1346 s->sch.sched_nr); 1347 continue; 1348 } 1349 fsk_attach(fs, s); 1350 } 1351 } 1352 1353 #ifdef NEW_AQM 1354 /* Retrieve AQM configurations to ipfw userland 1355 */ 1356 static int 1357 get_aqm_parms(struct sockopt *sopt) 1358 { 1359 struct dn_extra_parms *ep; 1360 struct dn_fsk *fs; 1361 size_t sopt_valsize; 1362 int l, err = 0; 1363 1364 sopt_valsize = sopt->sopt_valsize; 1365 l = sizeof(*ep); 1366 if (sopt->sopt_valsize < l) { 1367 D("bad len sopt->sopt_valsize %d len %d", 1368 (int) sopt->sopt_valsize , l); 1369 err = EINVAL; 1370 return err; 1371 } 1372 ep = malloc(l, M_DUMMYNET, M_NOWAIT); 1373 if(!ep) { 1374 err = ENOMEM ; 1375 return err; 1376 } 1377 do { 1378 err = sooptcopyin(sopt, ep, l, l); 1379 if(err) 1380 break; 1381 sopt->sopt_valsize = sopt_valsize; 1382 if (ep->oid.len < l) { 1383 err = EINVAL; 1384 break; 1385 } 1386 1387 fs = dn_ht_find(V_dn_cfg.fshash, ep->nr, 0, NULL); 1388 if (!fs) { 1389 D("fs %d not found", ep->nr); 1390 err = EINVAL; 1391 break; 1392 } 1393 1394 if (fs->aqmfp && fs->aqmfp->getconfig) { 1395 if(fs->aqmfp->getconfig(fs, ep)) { 1396 D("Error while trying to get AQM params"); 1397 err = EINVAL; 1398 break; 1399 } 1400 ep->oid.len = l; 1401 err = sooptcopyout(sopt, ep, l); 1402 } 1403 }while(0); 1404 1405 free(ep, M_DUMMYNET); 1406 return err; 1407 } 1408 1409 /* Retrieve AQM configurations to ipfw userland 1410 */ 1411 static int 1412 get_sched_parms(struct sockopt *sopt) 1413 { 1414 struct dn_extra_parms *ep; 1415 struct dn_schk *schk; 1416 size_t sopt_valsize; 1417 int l, err = 0; 1418 1419 sopt_valsize = sopt->sopt_valsize; 1420 l = sizeof(*ep); 1421 if (sopt->sopt_valsize < l) { 1422 D("bad len sopt->sopt_valsize %d len %d", 1423 (int) sopt->sopt_valsize , l); 1424 err = EINVAL; 1425 return err; 1426 } 1427 ep = malloc(l, M_DUMMYNET, M_NOWAIT); 1428 if(!ep) { 1429 err = ENOMEM ; 1430 return err; 1431 } 1432 do { 1433 err = sooptcopyin(sopt, ep, l, l); 1434 if(err) 1435 break; 1436 sopt->sopt_valsize = sopt_valsize; 1437 if (ep->oid.len < l) { 1438 err = EINVAL; 1439 break; 1440 } 1441 1442 schk = locate_scheduler(ep->nr); 1443 if (!schk) { 1444 D("sched %d not found", ep->nr); 1445 err = EINVAL; 1446 break; 1447 } 1448 1449 if (schk->fp && schk->fp->getconfig) { 1450 if(schk->fp->getconfig(schk, ep)) { 1451 D("Error while trying to get sched params"); 1452 err = EINVAL; 1453 break; 1454 } 1455 ep->oid.len = l; 1456 err = sooptcopyout(sopt, ep, l); 1457 } 1458 }while(0); 1459 free(ep, M_DUMMYNET); 1460 1461 return err; 1462 } 1463 1464 /* Configure AQM for flowset 'fs'. 1465 * extra parameters are passed from userland. 1466 */ 1467 static int 1468 config_aqm(struct dn_fsk *fs, struct dn_extra_parms *ep, int busy) 1469 { 1470 int err = 0; 1471 1472 NET_EPOCH_ASSERT(); 1473 1474 do { 1475 /* no configurations */ 1476 if (!ep) { 1477 err = 0; 1478 break; 1479 } 1480 1481 /* no AQM for this flowset*/ 1482 if (!strcmp(ep->name,"")) { 1483 err = 0; 1484 break; 1485 } 1486 if (ep->oid.len < sizeof(*ep)) { 1487 D("short aqm len %d", ep->oid.len); 1488 err = EINVAL; 1489 break; 1490 } 1491 1492 if (busy) { 1493 D("Unable to configure flowset, flowset busy!"); 1494 err = EINVAL; 1495 break; 1496 } 1497 1498 /* deconfigure old aqm if exist */ 1499 if (fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig) { 1500 aqm_cleanup_deconfig_fs(fs); 1501 } 1502 1503 if (!(fs->aqmfp = find_aqm_type(0, ep->name))) { 1504 D("AQM functions not found for type %s!", ep->name); 1505 fs->fs.flags &= ~DN_IS_AQM; 1506 err = EINVAL; 1507 break; 1508 } else 1509 fs->fs.flags |= DN_IS_AQM; 1510 1511 if (ep->oid.subtype != DN_AQM_PARAMS) { 1512 D("Wrong subtype"); 1513 err = EINVAL; 1514 break; 1515 } 1516 1517 if (fs->aqmfp->config) { 1518 err = fs->aqmfp->config(fs, ep, ep->oid.len); 1519 if (err) { 1520 D("Unable to configure AQM for FS %d", fs->fs.fs_nr ); 1521 fs->fs.flags &= ~DN_IS_AQM; 1522 fs->aqmfp = NULL; 1523 break; 1524 } 1525 } 1526 } while(0); 1527 1528 return err; 1529 } 1530 #endif 1531 1532 /* 1533 * Configuration -- to preserve backward compatibility we use 1534 * the following scheme (N is 65536) 1535 * NUMBER SCHED LINK FLOWSET 1536 * 1 .. N-1 (1)WFQ (2)WFQ (3)queue 1537 * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1 1538 * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1 1539 * 1540 * "pipe i config" configures #1, #2 and #3 1541 * "sched i config" configures #1 and possibly #6 1542 * "queue i config" configures #3 1543 * #1 is configured with 'pipe i config' or 'sched i config' 1544 * #2 is configured with 'pipe i config', and created if not 1545 * existing with 'sched i config' 1546 * #3 is configured with 'queue i config' 1547 * #4 is automatically configured after #1, can only be FIFO 1548 * #5 is automatically configured after #2 1549 * #6 is automatically created when #1 is !MULTIQUEUE, 1550 * and can be updated. 1551 * #7 is automatically configured after #2 1552 */ 1553 1554 /* 1555 * configure a link (and its FIFO instance) 1556 */ 1557 static int 1558 config_link(struct dn_link *p, struct dn_id *arg) 1559 { 1560 int i; 1561 1562 if (p->oid.len != sizeof(*p)) { 1563 D("invalid pipe len %d", p->oid.len); 1564 return EINVAL; 1565 } 1566 i = p->link_nr; 1567 if (i <= 0 || i >= DN_MAX_ID) 1568 return EINVAL; 1569 /* 1570 * The config program passes parameters as follows: 1571 * bw = bits/second (0 means no limits), 1572 * delay = ms, must be translated into ticks. 1573 * qsize = slots/bytes 1574 * burst ??? 1575 */ 1576 p->delay = (p->delay * hz) / 1000; 1577 /* Scale burst size: bytes -> bits * hz */ 1578 p->burst *= 8 * hz; 1579 1580 DN_BH_WLOCK(); 1581 /* do it twice, base link and FIFO link */ 1582 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1583 struct dn_schk *s = locate_scheduler(i); 1584 if (s == NULL) { 1585 DN_BH_WUNLOCK(); 1586 D("sched %d not found", i); 1587 return EINVAL; 1588 } 1589 /* remove profile if exists */ 1590 free(s->profile, M_DUMMYNET); 1591 s->profile = NULL; 1592 1593 /* copy all parameters */ 1594 s->link.oid = p->oid; 1595 s->link.link_nr = i; 1596 s->link.delay = p->delay; 1597 if (s->link.bandwidth != p->bandwidth) { 1598 /* XXX bandwidth changes, need to update red params */ 1599 s->link.bandwidth = p->bandwidth; 1600 update_red(s); 1601 } 1602 s->link.burst = p->burst; 1603 schk_reset_credit(s); 1604 } 1605 V_dn_cfg.id++; 1606 DN_BH_WUNLOCK(); 1607 return 0; 1608 } 1609 1610 /* 1611 * configure a flowset. Can be called from inside with locked=1, 1612 */ 1613 static struct dn_fsk * 1614 config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) 1615 { 1616 int i; 1617 struct dn_fsk *fs; 1618 #ifdef NEW_AQM 1619 struct dn_extra_parms *ep; 1620 #endif 1621 1622 if (nfs->oid.len != sizeof(*nfs)) { 1623 D("invalid flowset len %d", nfs->oid.len); 1624 return NULL; 1625 } 1626 i = nfs->fs_nr; 1627 if (i <= 0 || i >= 3*DN_MAX_ID) 1628 return NULL; 1629 #ifdef NEW_AQM 1630 ep = NULL; 1631 if (arg != NULL) { 1632 ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT); 1633 if (ep == NULL) 1634 return (NULL); 1635 memcpy(ep, arg, sizeof(*ep)); 1636 } 1637 #endif 1638 ND("flowset %d", i); 1639 /* XXX other sanity checks */ 1640 if (nfs->flags & DN_QSIZE_BYTES) { 1641 ipdn_bound_var(&nfs->qsize, 16384, 1642 1500, V_dn_cfg.byte_limit, NULL); // "queue byte size"); 1643 } else { 1644 ipdn_bound_var(&nfs->qsize, 50, 1645 1, V_dn_cfg.slot_limit, NULL); // "queue slot size"); 1646 } 1647 if (nfs->flags & DN_HAVE_MASK) { 1648 /* make sure we have some buckets */ 1649 ipdn_bound_var((int *)&nfs->buckets, V_dn_cfg.hash_size, 1650 1, V_dn_cfg.max_hash_size, "flowset buckets"); 1651 } else { 1652 nfs->buckets = 1; /* we only need 1 */ 1653 } 1654 if (!locked) 1655 DN_BH_WLOCK(); 1656 do { /* exit with break when done */ 1657 struct dn_schk *s; 1658 int flags = nfs->sched_nr ? DNHT_INSERT : 0; 1659 int j; 1660 int oldc = V_dn_cfg.fsk_count; 1661 fs = dn_ht_find(V_dn_cfg.fshash, i, flags, NULL); 1662 if (fs == NULL) { 1663 D("missing sched for flowset %d", i); 1664 break; 1665 } 1666 /* grab some defaults from the existing one */ 1667 if (nfs->sched_nr == 0) /* reuse */ 1668 nfs->sched_nr = fs->fs.sched_nr; 1669 for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) { 1670 if (nfs->par[j] == -1) /* reuse */ 1671 nfs->par[j] = fs->fs.par[j]; 1672 } 1673 if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) { 1674 ND("flowset %d unchanged", i); 1675 #ifdef NEW_AQM 1676 if (ep != NULL) { 1677 /* 1678 * Reconfigure AQM as the parameters can be changed. 1679 * We consider the flowset as busy if it has scheduler 1680 * instance(s). 1681 */ 1682 s = locate_scheduler(nfs->sched_nr); 1683 config_aqm(fs, ep, s != NULL && s->siht != NULL); 1684 } 1685 #endif 1686 break; /* no change, nothing to do */ 1687 } 1688 if (oldc != V_dn_cfg.fsk_count) /* new item */ 1689 V_dn_cfg.id++; 1690 s = locate_scheduler(nfs->sched_nr); 1691 /* detach from old scheduler if needed, preserving 1692 * queues if we need to reattach. Then update the 1693 * configuration, and possibly attach to the new sched. 1694 */ 1695 DX(2, "fs %d changed sched %d@%p to %d@%p", 1696 fs->fs.fs_nr, 1697 fs->fs.sched_nr, fs->sched, nfs->sched_nr, s); 1698 if (fs->sched) { 1699 int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY); 1700 flags |= DN_DESTROY; /* XXX temporary */ 1701 fsk_detach(fs, flags); 1702 } 1703 fs->fs = *nfs; /* copy configuration */ 1704 #ifdef NEW_AQM 1705 fs->aqmfp = NULL; 1706 if (ep != NULL) 1707 config_aqm(fs, ep, s != NULL && 1708 s->siht != NULL); 1709 #endif 1710 if (s != NULL) 1711 fsk_attach(fs, s); 1712 } while (0); 1713 if (!locked) 1714 DN_BH_WUNLOCK(); 1715 #ifdef NEW_AQM 1716 free(ep, M_TEMP); 1717 #endif 1718 return fs; 1719 } 1720 1721 /* 1722 * config/reconfig a scheduler and its FIFO variant. 1723 * For !MULTIQUEUE schedulers, also set up the flowset. 1724 * 1725 * On reconfigurations (detected because s->fp is set), 1726 * detach existing flowsets preserving traffic, preserve link, 1727 * and delete the old scheduler creating a new one. 1728 */ 1729 static int 1730 config_sched(struct dn_sch *_nsch, struct dn_id *arg) 1731 { 1732 struct dn_schk *s; 1733 struct schk_new_arg a; /* argument for schk_new */ 1734 int i; 1735 struct dn_link p; /* copy of oldlink */ 1736 struct dn_profile *pf = NULL; /* copy of old link profile */ 1737 /* Used to preserv mask parameter */ 1738 struct ipfw_flow_id new_mask; 1739 int new_buckets = 0; 1740 int new_flags = 0; 1741 int pipe_cmd; 1742 int err = ENOMEM; 1743 1744 NET_EPOCH_ASSERT(); 1745 1746 a.sch = _nsch; 1747 if (a.sch->oid.len != sizeof(*a.sch)) { 1748 D("bad sched len %d", a.sch->oid.len); 1749 return EINVAL; 1750 } 1751 i = a.sch->sched_nr; 1752 if (i <= 0 || i >= DN_MAX_ID) 1753 return EINVAL; 1754 /* make sure we have some buckets */ 1755 if (a.sch->flags & DN_HAVE_MASK) 1756 ipdn_bound_var((int *)&a.sch->buckets, V_dn_cfg.hash_size, 1757 1, V_dn_cfg.max_hash_size, "sched buckets"); 1758 /* XXX other sanity checks */ 1759 bzero(&p, sizeof(p)); 1760 1761 pipe_cmd = a.sch->flags & DN_PIPE_CMD; 1762 a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set? 1763 if (pipe_cmd) { 1764 /* Copy mask parameter */ 1765 new_mask = a.sch->sched_mask; 1766 new_buckets = a.sch->buckets; 1767 new_flags = a.sch->flags; 1768 } 1769 DN_BH_WLOCK(); 1770 again: /* run twice, for wfq and fifo */ 1771 /* 1772 * lookup the type. If not supplied, use the previous one 1773 * or default to WF2Q+. Otherwise, return an error. 1774 */ 1775 V_dn_cfg.id++; 1776 a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name); 1777 if (a.fp != NULL) { 1778 /* found. Lookup or create entry */ 1779 s = dn_ht_find(V_dn_cfg.schedhash, i, DNHT_INSERT, &a); 1780 } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) { 1781 /* No type. search existing s* or retry with WF2Q+ */ 1782 s = dn_ht_find(V_dn_cfg.schedhash, i, 0, &a); 1783 if (s != NULL) { 1784 a.fp = s->fp; 1785 /* Scheduler exists, skip to FIFO scheduler 1786 * if command was pipe config... 1787 */ 1788 if (pipe_cmd) 1789 goto next; 1790 } else { 1791 /* New scheduler, create a wf2q+ with no mask 1792 * if command was pipe config... 1793 */ 1794 if (pipe_cmd) { 1795 /* clear mask parameter */ 1796 bzero(&a.sch->sched_mask, sizeof(new_mask)); 1797 a.sch->buckets = 0; 1798 a.sch->flags &= ~DN_HAVE_MASK; 1799 } 1800 a.sch->oid.subtype = DN_SCHED_WF2QP; 1801 goto again; 1802 } 1803 } else { 1804 D("invalid scheduler type %d %s", 1805 a.sch->oid.subtype, a.sch->name); 1806 err = EINVAL; 1807 goto error; 1808 } 1809 /* normalize name and subtype */ 1810 a.sch->oid.subtype = a.fp->type; 1811 bzero(a.sch->name, sizeof(a.sch->name)); 1812 strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name)); 1813 if (s == NULL) { 1814 D("cannot allocate scheduler %d", i); 1815 goto error; 1816 } 1817 /* restore existing link if any */ 1818 if (p.link_nr) { 1819 s->link = p; 1820 if (!pf || pf->link_nr != p.link_nr) { /* no saved value */ 1821 s->profile = NULL; /* XXX maybe not needed */ 1822 } else { 1823 s->profile = malloc(sizeof(struct dn_profile), 1824 M_DUMMYNET, M_NOWAIT | M_ZERO); 1825 if (s->profile == NULL) { 1826 D("cannot allocate profile"); 1827 goto error; //XXX 1828 } 1829 memcpy(s->profile, pf, sizeof(*pf)); 1830 } 1831 } 1832 p.link_nr = 0; 1833 if (s->fp == NULL) { 1834 DX(2, "sched %d new type %s", i, a.fp->name); 1835 } else if (s->fp != a.fp || 1836 bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) { 1837 /* already existing. */ 1838 DX(2, "sched %d type changed from %s to %s", 1839 i, s->fp->name, a.fp->name); 1840 DX(4, " type/sub %d/%d -> %d/%d", 1841 s->sch.oid.type, s->sch.oid.subtype, 1842 a.sch->oid.type, a.sch->oid.subtype); 1843 if (s->link.link_nr == 0) 1844 D("XXX WARNING link 0 for sched %d", i); 1845 p = s->link; /* preserve link */ 1846 if (s->profile) {/* preserve profile */ 1847 if (!pf) 1848 pf = malloc(sizeof(*pf), 1849 M_DUMMYNET, M_NOWAIT | M_ZERO); 1850 if (pf) /* XXX should issue a warning otherwise */ 1851 memcpy(pf, s->profile, sizeof(*pf)); 1852 } 1853 /* remove from the hash */ 1854 dn_ht_find(V_dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 1855 /* Detach flowsets, preserve queues. */ 1856 // schk_delete_cb(s, NULL); 1857 // XXX temporarily, kill queues 1858 schk_delete_cb(s, (void *)DN_DESTROY); 1859 goto again; 1860 } else { 1861 DX(4, "sched %d unchanged type %s", i, a.fp->name); 1862 } 1863 /* complete initialization */ 1864 s->sch = *a.sch; 1865 s->fp = a.fp; 1866 s->cfg = arg; 1867 // XXX schk_reset_credit(s); 1868 /* create the internal flowset if needed, 1869 * trying to reuse existing ones if available 1870 */ 1871 if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) { 1872 s->fs = dn_ht_find(V_dn_cfg.fshash, i, 0, NULL); 1873 if (!s->fs) { 1874 struct dn_fs fs; 1875 bzero(&fs, sizeof(fs)); 1876 set_oid(&fs.oid, DN_FS, sizeof(fs)); 1877 fs.fs_nr = i + DN_MAX_ID; 1878 fs.sched_nr = i; 1879 s->fs = config_fs(&fs, NULL, 1 /* locked */); 1880 } 1881 if (!s->fs) { 1882 schk_delete_cb(s, (void *)DN_DESTROY); 1883 D("error creating internal fs for %d", i); 1884 goto error; 1885 } 1886 } 1887 /* call init function after the flowset is created */ 1888 if (s->fp->config) 1889 s->fp->config(s); 1890 update_fs(s); 1891 next: 1892 if (i < DN_MAX_ID) { /* now configure the FIFO instance */ 1893 i += DN_MAX_ID; 1894 if (pipe_cmd) { 1895 /* Restore mask parameter for FIFO */ 1896 a.sch->sched_mask = new_mask; 1897 a.sch->buckets = new_buckets; 1898 a.sch->flags = new_flags; 1899 } else { 1900 /* sched config shouldn't modify the FIFO scheduler */ 1901 if (dn_ht_find(V_dn_cfg.schedhash, i, 0, &a) != NULL) { 1902 /* FIFO already exist, don't touch it */ 1903 err = 0; /* and this is not an error */ 1904 goto error; 1905 } 1906 } 1907 a.sch->sched_nr = i; 1908 a.sch->oid.subtype = DN_SCHED_FIFO; 1909 bzero(a.sch->name, sizeof(a.sch->name)); 1910 goto again; 1911 } 1912 err = 0; 1913 error: 1914 DN_BH_WUNLOCK(); 1915 free(pf, M_DUMMYNET); 1916 return err; 1917 } 1918 1919 /* 1920 * attach a profile to a link 1921 */ 1922 static int 1923 config_profile(struct dn_profile *pf, struct dn_id *arg) 1924 { 1925 struct dn_schk *s; 1926 int i, olen, err = 0; 1927 1928 if (pf->oid.len < sizeof(*pf)) { 1929 D("short profile len %d", pf->oid.len); 1930 return EINVAL; 1931 } 1932 i = pf->link_nr; 1933 if (i <= 0 || i >= DN_MAX_ID) 1934 return EINVAL; 1935 /* XXX other sanity checks */ 1936 DN_BH_WLOCK(); 1937 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1938 s = locate_scheduler(i); 1939 1940 if (s == NULL) { 1941 err = EINVAL; 1942 break; 1943 } 1944 V_dn_cfg.id++; 1945 /* 1946 * If we had a profile and the new one does not fit, 1947 * or it is deleted, then we need to free memory. 1948 */ 1949 if (s->profile && (pf->samples_no == 0 || 1950 s->profile->oid.len < pf->oid.len)) { 1951 free(s->profile, M_DUMMYNET); 1952 s->profile = NULL; 1953 } 1954 if (pf->samples_no == 0) 1955 continue; 1956 /* 1957 * new profile, possibly allocate memory 1958 * and copy data. 1959 */ 1960 if (s->profile == NULL) 1961 s->profile = malloc(pf->oid.len, 1962 M_DUMMYNET, M_NOWAIT | M_ZERO); 1963 if (s->profile == NULL) { 1964 D("no memory for profile %d", i); 1965 err = ENOMEM; 1966 break; 1967 } 1968 /* preserve larger length XXX double check */ 1969 olen = s->profile->oid.len; 1970 if (olen < pf->oid.len) 1971 olen = pf->oid.len; 1972 memcpy(s->profile, pf, pf->oid.len); 1973 s->profile->oid.len = olen; 1974 } 1975 DN_BH_WUNLOCK(); 1976 return err; 1977 } 1978 1979 /* 1980 * Delete all objects: 1981 */ 1982 static void 1983 dummynet_flush(void) 1984 { 1985 1986 /* delete all schedulers and related links/queues/flowsets */ 1987 dn_ht_scan(V_dn_cfg.schedhash, schk_delete_cb, 1988 (void *)(uintptr_t)DN_DELETE_FS); 1989 /* delete all remaining (unlinked) flowsets */ 1990 DX(4, "still %d unlinked fs", V_dn_cfg.fsk_count); 1991 dn_ht_free(V_dn_cfg.fshash, DNHT_REMOVE); 1992 fsk_detach_list(&V_dn_cfg.fsu, DN_DELETE_FS); 1993 /* Reinitialize system heap... */ 1994 heap_init(&V_dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 1995 } 1996 1997 /* 1998 * Main handler for configuration. We are guaranteed to be called 1999 * with an oid which is at least a dn_id. 2000 * - the first object is the command (config, delete, flush, ...) 2001 * - config_link must be issued after the corresponding config_sched 2002 * - parameters (DN_TXT) for an object must precede the object 2003 * processed on a config_sched. 2004 */ 2005 int 2006 do_config(void *p, size_t l) 2007 { 2008 struct dn_id o; 2009 union { 2010 struct dn_profile profile; 2011 struct dn_fs fs; 2012 struct dn_link link; 2013 struct dn_sch sched; 2014 } *dn; 2015 struct dn_id *arg; 2016 uintptr_t a; 2017 int err, err2, off; 2018 2019 memcpy(&o, p, sizeof(o)); 2020 if (o.id != DN_API_VERSION) { 2021 D("invalid api version got %d need %d", o.id, DN_API_VERSION); 2022 return EINVAL; 2023 } 2024 arg = NULL; 2025 dn = NULL; 2026 off = 0; 2027 while (l >= sizeof(o)) { 2028 memcpy(&o, (char *)p + off, sizeof(o)); 2029 if (o.len < sizeof(o) || l < o.len) { 2030 D("bad len o.len %d len %zu", o.len, l); 2031 err = EINVAL; 2032 break; 2033 } 2034 l -= o.len; 2035 err = 0; 2036 switch (o.type) { 2037 default: 2038 D("cmd %d not implemented", o.type); 2039 break; 2040 2041 #ifdef EMULATE_SYSCTL 2042 /* sysctl emulation. 2043 * if we recognize the command, jump to the correct 2044 * handler and return 2045 */ 2046 case DN_SYSCTL_SET: 2047 err = kesysctl_emu_set(p, l); 2048 return err; 2049 #endif 2050 2051 case DN_CMD_CONFIG: /* simply a header */ 2052 break; 2053 2054 case DN_CMD_DELETE: 2055 /* the argument is in the first uintptr_t after o */ 2056 if (o.len < sizeof(o) + sizeof(a)) { 2057 err = EINVAL; 2058 break; 2059 } 2060 memcpy(&a, (char *)p + off + sizeof(o), sizeof(a)); 2061 switch (o.subtype) { 2062 case DN_LINK: 2063 /* delete base and derived schedulers */ 2064 DN_BH_WLOCK(); 2065 err = delete_schk(a); 2066 err2 = delete_schk(a + DN_MAX_ID); 2067 DN_BH_WUNLOCK(); 2068 if (!err) 2069 err = err2; 2070 break; 2071 2072 default: 2073 D("invalid delete type %d", o.subtype); 2074 err = EINVAL; 2075 break; 2076 2077 case DN_FS: 2078 err = (a < 1 || a >= DN_MAX_ID) ? 2079 EINVAL : delete_fs(a, 0) ; 2080 break; 2081 } 2082 break; 2083 2084 case DN_CMD_FLUSH: 2085 DN_BH_WLOCK(); 2086 dummynet_flush(); 2087 DN_BH_WUNLOCK(); 2088 break; 2089 case DN_TEXT: /* store argument of next block */ 2090 free(arg, M_TEMP); 2091 arg = malloc(o.len, M_TEMP, M_NOWAIT); 2092 if (arg == NULL) { 2093 err = ENOMEM; 2094 break; 2095 } 2096 memcpy(arg, (char *)p + off, o.len); 2097 break; 2098 case DN_LINK: 2099 if (dn == NULL) 2100 dn = malloc(sizeof(*dn), M_TEMP, M_NOWAIT); 2101 if (dn == NULL) { 2102 err = ENOMEM; 2103 break; 2104 } 2105 memcpy(&dn->link, (char *)p + off, sizeof(dn->link)); 2106 err = config_link(&dn->link, arg); 2107 break; 2108 case DN_PROFILE: 2109 if (dn == NULL) 2110 dn = malloc(sizeof(*dn), M_TEMP, M_NOWAIT); 2111 if (dn == NULL) { 2112 err = ENOMEM; 2113 break; 2114 } 2115 memcpy(&dn->profile, (char *)p + off, 2116 sizeof(dn->profile)); 2117 err = config_profile(&dn->profile, arg); 2118 break; 2119 case DN_SCH: 2120 if (dn == NULL) 2121 dn = malloc(sizeof(*dn), M_TEMP, M_NOWAIT); 2122 if (dn == NULL) { 2123 err = ENOMEM; 2124 break; 2125 } 2126 memcpy(&dn->sched, (char *)p + off, 2127 sizeof(dn->sched)); 2128 err = config_sched(&dn->sched, arg); 2129 break; 2130 case DN_FS: 2131 if (dn == NULL) 2132 dn = malloc(sizeof(*dn), M_TEMP, M_NOWAIT); 2133 if (dn == NULL) { 2134 err = ENOMEM; 2135 break; 2136 } 2137 memcpy(&dn->fs, (char *)p + off, sizeof(dn->fs)); 2138 err = (NULL == config_fs(&dn->fs, arg, 0)); 2139 break; 2140 } 2141 if (err != 0) 2142 break; 2143 off += o.len; 2144 } 2145 free(arg, M_TEMP); 2146 free(dn, M_TEMP); 2147 return err; 2148 } 2149 2150 static int 2151 compute_space(struct dn_id *cmd, struct copy_args *a) 2152 { 2153 int x = 0, need = 0; 2154 int profile_size = sizeof(struct dn_profile) - 2155 ED_MAX_SAMPLES_NO*sizeof(int); 2156 2157 /* NOTE about compute space: 2158 * NP = V_dn_cfg.schk_count 2159 * NSI = V_dn_cfg.si_count 2160 * NF = V_dn_cfg.fsk_count 2161 * NQ = V_dn_cfg.queue_count 2162 * - ipfw pipe show 2163 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 2164 * link, scheduler template, flowset 2165 * integrated in scheduler and header 2166 * for flowset list 2167 * (NSI)*(dn_flow) all scheduler instance (includes 2168 * the queue instance) 2169 * - ipfw sched show 2170 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 2171 * link, scheduler template, flowset 2172 * integrated in scheduler and header 2173 * for flowset list 2174 * (NSI * dn_flow) all scheduler instances 2175 * (NF * sizeof(uint_32)) space for flowset list linked to scheduler 2176 * (NQ * dn_queue) all queue [XXXfor now not listed] 2177 * - ipfw queue show 2178 * (NF * dn_fs) all flowset 2179 * (NQ * dn_queue) all queues 2180 */ 2181 switch (cmd->subtype) { 2182 default: 2183 return -1; 2184 /* XXX where do LINK and SCH differ ? */ 2185 /* 'ipfw sched show' could list all queues associated to 2186 * a scheduler. This feature for now is disabled 2187 */ 2188 case DN_LINK: /* pipe show */ 2189 x = DN_C_LINK | DN_C_SCH | DN_C_FLOW; 2190 need += V_dn_cfg.schk_count * 2191 (sizeof(struct dn_fs) + profile_size) / 2; 2192 need += V_dn_cfg.fsk_count * sizeof(uint32_t); 2193 break; 2194 case DN_SCH: /* sched show */ 2195 need += V_dn_cfg.schk_count * 2196 (sizeof(struct dn_fs) + profile_size) / 2; 2197 need += V_dn_cfg.fsk_count * sizeof(uint32_t); 2198 x = DN_C_SCH | DN_C_LINK | DN_C_FLOW; 2199 break; 2200 case DN_FS: /* queue show */ 2201 x = DN_C_FS | DN_C_QUEUE; 2202 break; 2203 case DN_GET_COMPAT: /* compatibility mode */ 2204 need = dn_compat_calc_size(); 2205 break; 2206 } 2207 a->flags = x; 2208 if (x & DN_C_SCH) { 2209 need += V_dn_cfg.schk_count * sizeof(struct dn_sch) / 2; 2210 /* NOT also, each fs might be attached to a sched */ 2211 need += V_dn_cfg.schk_count * sizeof(struct dn_id) / 2; 2212 } 2213 if (x & DN_C_FS) 2214 need += V_dn_cfg.fsk_count * sizeof(struct dn_fs); 2215 if (x & DN_C_LINK) { 2216 need += V_dn_cfg.schk_count * sizeof(struct dn_link) / 2; 2217 } 2218 /* 2219 * When exporting a queue to userland, only pass up the 2220 * struct dn_flow, which is the only visible part. 2221 */ 2222 2223 if (x & DN_C_QUEUE) 2224 need += V_dn_cfg.queue_count * sizeof(struct dn_flow); 2225 if (x & DN_C_FLOW) 2226 need += V_dn_cfg.si_count * (sizeof(struct dn_flow)); 2227 return need; 2228 } 2229 2230 /* 2231 * If compat != NULL dummynet_get is called in compatibility mode. 2232 * *compat will be the pointer to the buffer to pass to ipfw 2233 */ 2234 int 2235 dummynet_get(struct sockopt *sopt, void **compat) 2236 { 2237 int have, i, need, error; 2238 char *start = NULL, *buf; 2239 size_t sopt_valsize; 2240 struct dn_id *cmd; 2241 struct copy_args a; 2242 struct copy_range r; 2243 int l = sizeof(struct dn_id); 2244 2245 bzero(&a, sizeof(a)); 2246 bzero(&r, sizeof(r)); 2247 2248 /* save and restore original sopt_valsize around copyin */ 2249 sopt_valsize = sopt->sopt_valsize; 2250 2251 cmd = &r.o; 2252 2253 if (!compat) { 2254 /* copy at least an oid, and possibly a full object */ 2255 error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); 2256 sopt->sopt_valsize = sopt_valsize; 2257 if (error) 2258 goto done; 2259 l = cmd->len; 2260 #ifdef EMULATE_SYSCTL 2261 /* sysctl emulation. */ 2262 if (cmd->type == DN_SYSCTL_GET) 2263 return kesysctl_emu_get(sopt); 2264 #endif 2265 if (l > sizeof(r)) { 2266 /* request larger than default, allocate buffer */ 2267 cmd = malloc(l, M_DUMMYNET, M_NOWAIT); 2268 if (cmd == NULL) { 2269 error = ENOMEM; 2270 goto done; 2271 } 2272 error = sooptcopyin(sopt, cmd, l, l); 2273 sopt->sopt_valsize = sopt_valsize; 2274 if (error) 2275 goto done; 2276 } 2277 } else { /* compatibility */ 2278 error = 0; 2279 cmd->type = DN_CMD_GET; 2280 cmd->len = sizeof(struct dn_id); 2281 cmd->subtype = DN_GET_COMPAT; 2282 // cmd->id = sopt_valsize; 2283 D("compatibility mode"); 2284 } 2285 2286 #ifdef NEW_AQM 2287 /* get AQM params */ 2288 if(cmd->subtype == DN_AQM_PARAMS) { 2289 error = get_aqm_parms(sopt); 2290 goto done; 2291 /* get Scheduler params */ 2292 } else if (cmd->subtype == DN_SCH_PARAMS) { 2293 error = get_sched_parms(sopt); 2294 goto done; 2295 } 2296 #endif 2297 2298 a.extra = (struct copy_range *)cmd; 2299 if (cmd->len == sizeof(*cmd)) { /* no range, create a default */ 2300 uint32_t *rp = (uint32_t *)(cmd + 1); 2301 cmd->len += 2* sizeof(uint32_t); 2302 rp[0] = 1; 2303 rp[1] = DN_MAX_ID - 1; 2304 if (cmd->subtype == DN_LINK) { 2305 rp[0] += DN_MAX_ID; 2306 rp[1] += DN_MAX_ID; 2307 } 2308 } 2309 /* Count space (under lock) and allocate (outside lock). 2310 * Exit with lock held if we manage to get enough buffer. 2311 * Try a few times then give up. 2312 */ 2313 for (have = 0, i = 0; i < 10; i++) { 2314 DN_BH_WLOCK(); 2315 need = compute_space(cmd, &a); 2316 2317 /* if there is a range, ignore value from compute_space() */ 2318 if (l > sizeof(*cmd)) 2319 need = sopt_valsize - sizeof(*cmd); 2320 2321 if (need < 0) { 2322 DN_BH_WUNLOCK(); 2323 error = EINVAL; 2324 goto done; 2325 } 2326 need += sizeof(*cmd); 2327 cmd->id = need; 2328 if (have >= need) 2329 break; 2330 2331 DN_BH_WUNLOCK(); 2332 free(start, M_DUMMYNET); 2333 start = NULL; 2334 if (need > sopt_valsize) 2335 break; 2336 2337 have = need; 2338 start = malloc(have, M_DUMMYNET, M_NOWAIT | M_ZERO); 2339 } 2340 2341 if (start == NULL) { 2342 if (compat) { 2343 *compat = NULL; 2344 error = 1; // XXX 2345 } else { 2346 error = sooptcopyout(sopt, cmd, sizeof(*cmd)); 2347 } 2348 goto done; 2349 } 2350 ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " 2351 "%d:%d si %d, %d:%d queues %d", 2352 V_dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH, 2353 V_dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK, 2354 V_dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS, 2355 V_dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I, 2356 V_dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE); 2357 sopt->sopt_valsize = sopt_valsize; 2358 a.type = cmd->subtype; 2359 2360 if (compat == NULL) { 2361 memcpy(start, cmd, sizeof(*cmd)); 2362 ((struct dn_id*)(start))->len = sizeof(struct dn_id); 2363 buf = start + sizeof(*cmd); 2364 } else 2365 buf = start; 2366 a.start = &buf; 2367 a.end = start + have; 2368 /* start copying other objects */ 2369 if (compat) { 2370 a.type = DN_COMPAT_PIPE; 2371 dn_ht_scan(V_dn_cfg.schedhash, copy_data_helper_compat, &a); 2372 a.type = DN_COMPAT_QUEUE; 2373 dn_ht_scan(V_dn_cfg.fshash, copy_data_helper_compat, &a); 2374 } else if (a.type == DN_FS) { 2375 dn_ht_scan(V_dn_cfg.fshash, copy_data_helper, &a); 2376 } else { 2377 dn_ht_scan(V_dn_cfg.schedhash, copy_data_helper, &a); 2378 } 2379 DN_BH_WUNLOCK(); 2380 2381 if (compat) { 2382 *compat = start; 2383 sopt->sopt_valsize = buf - start; 2384 /* free() is done by ip_dummynet_compat() */ 2385 start = NULL; //XXX hack 2386 } else { 2387 error = sooptcopyout(sopt, start, buf - start); 2388 } 2389 done: 2390 if (cmd != &r.o) 2391 free(cmd, M_DUMMYNET); 2392 free(start, M_DUMMYNET); 2393 return error; 2394 } 2395 2396 /* Callback called on scheduler instance to delete it if idle */ 2397 static int 2398 drain_scheduler_cb(void *_si, void *arg) 2399 { 2400 struct dn_sch_inst *si = _si; 2401 2402 if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL) 2403 return 0; 2404 2405 if (si->sched->fp->flags & DN_MULTIQUEUE) { 2406 if (si->q_count == 0) 2407 return si_destroy(si, NULL); 2408 else 2409 return 0; 2410 } else { /* !DN_MULTIQUEUE */ 2411 if ((si+1)->ni.length == 0) 2412 return si_destroy(si, NULL); 2413 else 2414 return 0; 2415 } 2416 return 0; /* unreachable */ 2417 } 2418 2419 /* Callback called on scheduler to check if it has instances */ 2420 static int 2421 drain_scheduler_sch_cb(void *_s, void *arg) 2422 { 2423 struct dn_schk *s = _s; 2424 2425 if (s->sch.flags & DN_HAVE_MASK) { 2426 dn_ht_scan_bucket(s->siht, &s->drain_bucket, 2427 drain_scheduler_cb, NULL); 2428 s->drain_bucket++; 2429 } else { 2430 if (s->siht) { 2431 if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL) 2432 s->siht = NULL; 2433 } 2434 } 2435 return 0; 2436 } 2437 2438 /* Called every tick, try to delete a 'bucket' of scheduler */ 2439 void 2440 dn_drain_scheduler(void) 2441 { 2442 dn_ht_scan_bucket(V_dn_cfg.schedhash, &V_dn_cfg.drain_sch, 2443 drain_scheduler_sch_cb, NULL); 2444 V_dn_cfg.drain_sch++; 2445 } 2446 2447 /* Callback called on queue to delete if it is idle */ 2448 static int 2449 drain_queue_cb(void *_q, void *arg) 2450 { 2451 struct dn_queue *q = _q; 2452 2453 if (q->ni.length == 0) { 2454 dn_delete_queue(q, DN_DESTROY); 2455 return DNHT_SCAN_DEL; /* queue is deleted */ 2456 } 2457 2458 return 0; /* queue isn't deleted */ 2459 } 2460 2461 /* Callback called on flowset used to check if it has queues */ 2462 static int 2463 drain_queue_fs_cb(void *_fs, void *arg) 2464 { 2465 struct dn_fsk *fs = _fs; 2466 2467 if (fs->fs.flags & DN_QHT_HASH) { 2468 /* Flowset has a hash table for queues */ 2469 dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, 2470 drain_queue_cb, NULL); 2471 fs->drain_bucket++; 2472 } else { 2473 /* No hash table for this flowset, null the pointer 2474 * if the queue is deleted 2475 */ 2476 if (fs->qht) { 2477 if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL) 2478 fs->qht = NULL; 2479 } 2480 } 2481 return 0; 2482 } 2483 2484 /* Called every tick, try to delete a 'bucket' of queue */ 2485 void 2486 dn_drain_queue(void) 2487 { 2488 /* scan a bucket of flowset */ 2489 dn_ht_scan_bucket(V_dn_cfg.fshash, &V_dn_cfg.drain_fs, 2490 drain_queue_fs_cb, NULL); 2491 V_dn_cfg.drain_fs++; 2492 } 2493 2494 /* 2495 * Handler for the various dummynet socket options 2496 */ 2497 static int 2498 ip_dn_ctl(struct sockopt *sopt) 2499 { 2500 struct epoch_tracker et; 2501 void *p = NULL; 2502 size_t l; 2503 int error; 2504 2505 error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); 2506 if (error) 2507 return (error); 2508 2509 /* Disallow sets in really-really secure mode. */ 2510 if (sopt->sopt_dir == SOPT_SET) { 2511 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 2512 if (error) 2513 return (error); 2514 } 2515 2516 NET_EPOCH_ENTER(et); 2517 2518 switch (sopt->sopt_name) { 2519 default : 2520 D("dummynet: unknown option %d", sopt->sopt_name); 2521 error = EINVAL; 2522 break; 2523 2524 case IP_DUMMYNET_FLUSH: 2525 case IP_DUMMYNET_CONFIGURE: 2526 case IP_DUMMYNET_DEL: /* remove a pipe or queue */ 2527 case IP_DUMMYNET_GET: 2528 D("dummynet: compat option %d", sopt->sopt_name); 2529 error = ip_dummynet_compat(sopt); 2530 break; 2531 2532 case IP_DUMMYNET3: 2533 if (sopt->sopt_dir == SOPT_GET) { 2534 error = dummynet_get(sopt, NULL); 2535 break; 2536 } 2537 l = sopt->sopt_valsize; 2538 if (l < sizeof(struct dn_id) || l > 12000) { 2539 D("argument len %zu invalid", l); 2540 break; 2541 } 2542 p = malloc(l, M_TEMP, M_NOWAIT); 2543 if (p == NULL) { 2544 error = ENOMEM; 2545 break; 2546 } 2547 error = sooptcopyin(sopt, p, l, l); 2548 if (error == 0) 2549 error = do_config(p, l); 2550 break; 2551 } 2552 2553 free(p, M_TEMP); 2554 2555 NET_EPOCH_EXIT(et); 2556 2557 return error ; 2558 } 2559 2560 static void 2561 ip_dn_vnet_init(void) 2562 { 2563 if (V_dn_cfg.init_done) 2564 return; 2565 2566 /* Set defaults here. MSVC does not accept initializers, 2567 * and this is also useful for vimages 2568 */ 2569 /* queue limits */ 2570 V_dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */ 2571 V_dn_cfg.byte_limit = 1024 * 1024; 2572 V_dn_cfg.expire = 1; 2573 2574 /* RED parameters */ 2575 V_dn_cfg.red_lookup_depth = 256; /* default lookup table depth */ 2576 V_dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */ 2577 V_dn_cfg.red_max_pkt_size = 1500; /* default max packet size */ 2578 2579 /* hash tables */ 2580 V_dn_cfg.max_hash_size = 65536; /* max in the hash tables */ 2581 V_dn_cfg.hash_size = 64; /* default hash size */ 2582 2583 /* create hash tables for schedulers and flowsets. 2584 * In both we search by key and by pointer. 2585 */ 2586 V_dn_cfg.schedhash = dn_ht_init(NULL, V_dn_cfg.hash_size, 2587 offsetof(struct dn_schk, schk_next), 2588 schk_hash, schk_match, schk_new); 2589 V_dn_cfg.fshash = dn_ht_init(NULL, V_dn_cfg.hash_size, 2590 offsetof(struct dn_fsk, fsk_next), 2591 fsk_hash, fsk_match, fsk_new); 2592 2593 /* bucket index to drain object */ 2594 V_dn_cfg.drain_fs = 0; 2595 V_dn_cfg.drain_sch = 0; 2596 2597 heap_init(&V_dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 2598 SLIST_INIT(&V_dn_cfg.fsu); 2599 2600 DN_LOCK_INIT(); 2601 2602 /* Initialize curr_time adjustment mechanics. */ 2603 getmicrouptime(&V_dn_cfg.prev_t); 2604 2605 V_dn_cfg.init_done = 1; 2606 } 2607 2608 static void 2609 ip_dn_vnet_destroy(void) 2610 { 2611 DN_BH_WLOCK(); 2612 dummynet_flush(); 2613 DN_BH_WUNLOCK(); 2614 2615 dn_ht_free(V_dn_cfg.schedhash, 0); 2616 dn_ht_free(V_dn_cfg.fshash, 0); 2617 heap_free(&V_dn_cfg.evheap); 2618 2619 DN_LOCK_DESTROY(); 2620 } 2621 2622 static void 2623 ip_dn_init(void) 2624 { 2625 if (dn_tasks_started) 2626 return; 2627 2628 mtx_init(&sched_mtx, "dn_sched", NULL, MTX_DEF); 2629 2630 dn_tasks_started = 1; 2631 TASK_INIT(&dn_task, 0, dummynet_task, NULL); 2632 dn_tq = taskqueue_create_fast("dummynet", M_WAITOK, 2633 taskqueue_thread_enqueue, &dn_tq); 2634 taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); 2635 2636 CK_LIST_INIT(&schedlist); 2637 callout_init(&dn_timeout, 1); 2638 dn_reschedule(); 2639 } 2640 2641 static void 2642 ip_dn_destroy(int last) 2643 { 2644 /* ensure no more callouts are started */ 2645 dn_gone = 1; 2646 2647 /* check for last */ 2648 if (last) { 2649 ND("removing last instance\n"); 2650 ip_dn_ctl_ptr = NULL; 2651 ip_dn_io_ptr = NULL; 2652 } 2653 2654 callout_drain(&dn_timeout); 2655 taskqueue_drain(dn_tq, &dn_task); 2656 taskqueue_free(dn_tq); 2657 } 2658 2659 static int 2660 dummynet_modevent(module_t mod, int type, void *data) 2661 { 2662 2663 if (type == MOD_LOAD) { 2664 if (ip_dn_io_ptr) { 2665 printf("DUMMYNET already loaded\n"); 2666 return EEXIST ; 2667 } 2668 ip_dn_init(); 2669 ip_dn_ctl_ptr = ip_dn_ctl; 2670 ip_dn_io_ptr = dummynet_io; 2671 return 0; 2672 } else if (type == MOD_UNLOAD) { 2673 ip_dn_destroy(1 /* last */); 2674 return 0; 2675 } else 2676 return EOPNOTSUPP; 2677 } 2678 2679 /* modevent helpers for the modules */ 2680 static int 2681 load_dn_sched(struct dn_alg *d) 2682 { 2683 struct dn_alg *s; 2684 2685 if (d == NULL) 2686 return 1; /* error */ 2687 ip_dn_init(); /* just in case, we need the lock */ 2688 2689 /* Check that mandatory funcs exists */ 2690 if (d->enqueue == NULL || d->dequeue == NULL) { 2691 D("missing enqueue or dequeue for %s", d->name); 2692 return 1; 2693 } 2694 2695 /* Search if scheduler already exists */ 2696 mtx_lock(&sched_mtx); 2697 CK_LIST_FOREACH(s, &schedlist, next) { 2698 if (strcmp(s->name, d->name) == 0) { 2699 D("%s already loaded", d->name); 2700 break; /* scheduler already exists */ 2701 } 2702 } 2703 if (s == NULL) 2704 CK_LIST_INSERT_HEAD(&schedlist, d, next); 2705 mtx_unlock(&sched_mtx); 2706 D("dn_sched %s %sloaded", d->name, s ? "not ":""); 2707 return s ? 1 : 0; 2708 } 2709 2710 static int 2711 unload_dn_sched(struct dn_alg *s) 2712 { 2713 struct dn_alg *tmp, *r; 2714 int err = EINVAL; 2715 2716 ND("called for %s", s->name); 2717 2718 mtx_lock(&sched_mtx); 2719 CK_LIST_FOREACH_SAFE(r, &schedlist, next, tmp) { 2720 if (strcmp(s->name, r->name) != 0) 2721 continue; 2722 ND("ref_count = %d", r->ref_count); 2723 err = (r->ref_count != 0) ? EBUSY : 0; 2724 if (err == 0) 2725 CK_LIST_REMOVE(r, next); 2726 break; 2727 } 2728 mtx_unlock(&sched_mtx); 2729 NET_EPOCH_WAIT(); 2730 D("dn_sched %s %sunloaded", s->name, err ? "not ":""); 2731 return err; 2732 } 2733 2734 int 2735 dn_sched_modevent(module_t mod, int cmd, void *arg) 2736 { 2737 struct dn_alg *sch = arg; 2738 2739 if (cmd == MOD_LOAD) 2740 return load_dn_sched(sch); 2741 else if (cmd == MOD_UNLOAD) 2742 return unload_dn_sched(sch); 2743 else 2744 return EINVAL; 2745 } 2746 2747 static moduledata_t dummynet_mod = { 2748 "dummynet", dummynet_modevent, NULL 2749 }; 2750 2751 #define DN_SI_SUB SI_SUB_PROTO_FIREWALL 2752 #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ 2753 DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); 2754 MODULE_VERSION(dummynet, 3); 2755 2756 /* 2757 * Starting up. Done in order after dummynet_modevent() has been called. 2758 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2759 */ 2760 VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_vnet_init, NULL); 2761 2762 /* 2763 * Shutdown handlers up shop. These are done in REVERSE ORDER, but still 2764 * after dummynet_modevent() has been called. Not called on reboot. 2765 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2766 * or when the module is unloaded. 2767 */ 2768 VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_vnet_destroy, NULL); 2769 2770 #ifdef NEW_AQM 2771 2772 /* modevent helpers for the AQM modules */ 2773 static int 2774 load_dn_aqm(struct dn_aqm *d) 2775 { 2776 struct dn_aqm *aqm=NULL; 2777 2778 if (d == NULL) 2779 return 1; /* error */ 2780 ip_dn_init(); /* just in case, we need the lock */ 2781 2782 /* Check that mandatory funcs exists */ 2783 if (d->enqueue == NULL || d->dequeue == NULL) { 2784 D("missing enqueue or dequeue for %s", d->name); 2785 return 1; 2786 } 2787 2788 mtx_lock(&sched_mtx); 2789 2790 /* Search if AQM already exists */ 2791 CK_LIST_FOREACH(aqm, &aqmlist, next) { 2792 if (strcmp(aqm->name, d->name) == 0) { 2793 D("%s already loaded", d->name); 2794 break; /* AQM already exists */ 2795 } 2796 } 2797 if (aqm == NULL) 2798 CK_LIST_INSERT_HEAD(&aqmlist, d, next); 2799 2800 mtx_unlock(&sched_mtx); 2801 2802 D("dn_aqm %s %sloaded", d->name, aqm ? "not ":""); 2803 return aqm ? 1 : 0; 2804 } 2805 2806 /* Callback to clean up AQM status for queues connected to a flowset 2807 * and then deconfigure the flowset. 2808 * This function is called before an AQM module is unloaded 2809 */ 2810 static int 2811 fs_cleanup(void *_fs, void *arg) 2812 { 2813 struct dn_fsk *fs = _fs; 2814 uint32_t type = *(uint32_t *)arg; 2815 2816 if (fs->aqmfp && fs->aqmfp->type == type) 2817 aqm_cleanup_deconfig_fs(fs); 2818 2819 return 0; 2820 } 2821 2822 static int 2823 unload_dn_aqm(struct dn_aqm *aqm) 2824 { 2825 struct dn_aqm *tmp, *r; 2826 int err = EINVAL; 2827 err = 0; 2828 ND("called for %s", aqm->name); 2829 2830 /* clean up AQM status and deconfig flowset */ 2831 dn_ht_scan(V_dn_cfg.fshash, fs_cleanup, &aqm->type); 2832 2833 mtx_lock(&sched_mtx); 2834 2835 CK_LIST_FOREACH_SAFE(r, &aqmlist, next, tmp) { 2836 if (strcmp(aqm->name, r->name) != 0) 2837 continue; 2838 ND("ref_count = %d", r->ref_count); 2839 err = (r->ref_count != 0 || r->cfg_ref_count != 0) ? EBUSY : 0; 2840 if (err == 0) 2841 CK_LIST_REMOVE(r, next); 2842 break; 2843 } 2844 2845 mtx_unlock(&sched_mtx); 2846 NET_EPOCH_WAIT(); 2847 2848 D("%s %sunloaded", aqm->name, err ? "not ":""); 2849 if (err) 2850 D("ref_count=%d, cfg_ref_count=%d", r->ref_count, r->cfg_ref_count); 2851 return err; 2852 } 2853 2854 int 2855 dn_aqm_modevent(module_t mod, int cmd, void *arg) 2856 { 2857 struct dn_aqm *aqm = arg; 2858 2859 if (cmd == MOD_LOAD) 2860 return load_dn_aqm(aqm); 2861 else if (cmd == MOD_UNLOAD) 2862 return unload_dn_aqm(aqm); 2863 else 2864 return EINVAL; 2865 } 2866 #endif 2867 2868 /* end of file */ 2869