1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Codel/FQ_Codel and PIE/FQ-PIE Code: 5 * Copyright (C) 2016 Centre for Advanced Internet Architectures, 6 * Swinburne University of Technology, Melbourne, Australia. 7 * Portions of this code were made possible in part by a gift from 8 * The Comcast Innovation Fund. 9 * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> 10 * 11 * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa 12 * Portions Copyright (c) 2000 Akamba Corp. 13 * All rights reserved 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 /* 41 * Configuration and internal object management for dummynet. 42 */ 43 44 #include "opt_inet6.h" 45 46 #include <sys/param.h> 47 #include <sys/ck.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/mbuf.h> 51 #include <sys/kernel.h> 52 #include <sys/lock.h> 53 #include <sys/module.h> 54 #include <sys/mutex.h> 55 #include <sys/priv.h> 56 #include <sys/proc.h> 57 #include <sys/rwlock.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/time.h> 61 #include <sys/taskqueue.h> 62 #include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ 63 #include <netinet/in.h> 64 #include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ 65 #include <netinet/ip_fw.h> 66 #include <netinet/ip_dummynet.h> 67 #include <net/vnet.h> 68 69 #include <netpfil/ipfw/ip_fw_private.h> 70 #include <netpfil/ipfw/dn_heap.h> 71 #include <netpfil/ipfw/ip_dn_private.h> 72 #ifdef NEW_AQM 73 #include <netpfil/ipfw/dn_aqm.h> 74 #endif 75 #include <netpfil/ipfw/dn_sched.h> 76 77 /* which objects to copy */ 78 #define DN_C_LINK 0x01 79 #define DN_C_SCH 0x02 80 #define DN_C_FLOW 0x04 81 #define DN_C_FS 0x08 82 #define DN_C_QUEUE 0x10 83 84 /* we use this argument in case of a schk_new */ 85 struct schk_new_arg { 86 struct dn_alg *fp; 87 struct dn_sch *sch; 88 }; 89 90 /*---- callout hooks. ----*/ 91 static struct callout dn_timeout; 92 static int dn_tasks_started = 0; 93 static int dn_gone; 94 static struct task dn_task; 95 static struct taskqueue *dn_tq = NULL; 96 97 /* global scheduler list */ 98 struct mtx sched_mtx; 99 CK_LIST_HEAD(, dn_alg) schedlist; 100 #ifdef NEW_AQM 101 CK_LIST_HEAD(, dn_aqm) aqmlist; /* list of AQMs */ 102 #endif 103 104 static void 105 dummynet(void *arg) 106 { 107 108 (void)arg; /* UNUSED */ 109 taskqueue_enqueue(dn_tq, &dn_task); 110 } 111 112 void 113 dn_reschedule(void) 114 { 115 116 if (dn_gone != 0) 117 return; 118 callout_reset_sbt(&dn_timeout, tick_sbt, 0, dummynet, NULL, 119 C_HARDCLOCK | C_DIRECT_EXEC); 120 } 121 /*----- end of callout hooks -----*/ 122 123 #ifdef NEW_AQM 124 /* Return AQM descriptor for given type or name. */ 125 static struct dn_aqm * 126 find_aqm_type(int type, char *name) 127 { 128 struct dn_aqm *d; 129 130 NET_EPOCH_ASSERT(); 131 132 CK_LIST_FOREACH(d, &aqmlist, next) { 133 if (d->type == type || (name && !strcasecmp(d->name, name))) 134 return d; 135 } 136 return NULL; /* not found */ 137 } 138 #endif 139 140 /* Return a scheduler descriptor given the type or name. */ 141 static struct dn_alg * 142 find_sched_type(int type, char *name) 143 { 144 struct dn_alg *d; 145 146 NET_EPOCH_ASSERT(); 147 148 CK_LIST_FOREACH(d, &schedlist, next) { 149 if (d->type == type || (name && !strcasecmp(d->name, name))) 150 return d; 151 } 152 return NULL; /* not found */ 153 } 154 155 int 156 ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) 157 { 158 int oldv = *v; 159 const char *op = NULL; 160 if (dflt < lo) 161 dflt = lo; 162 if (dflt > hi) 163 dflt = hi; 164 if (oldv < lo) { 165 *v = dflt; 166 op = "Bump"; 167 } else if (oldv > hi) { 168 *v = hi; 169 op = "Clamp"; 170 } else 171 return *v; 172 if (op && msg) 173 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 174 return *v; 175 } 176 177 /*---- flow_id mask, hash and compare functions ---*/ 178 /* 179 * The flow_id includes the 5-tuple, the queue/pipe number 180 * which we store in the extra area in host order, 181 * and for ipv6 also the flow_id6. 182 * XXX see if we want the tos byte (can store in 'flags') 183 */ 184 static struct ipfw_flow_id * 185 flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) 186 { 187 int is_v6 = IS_IP6_FLOW_ID(id); 188 189 id->dst_port &= mask->dst_port; 190 id->src_port &= mask->src_port; 191 id->proto &= mask->proto; 192 id->extra &= mask->extra; 193 if (is_v6) { 194 APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); 195 APPLY_MASK(&id->src_ip6, &mask->src_ip6); 196 id->flow_id6 &= mask->flow_id6; 197 } else { 198 id->dst_ip &= mask->dst_ip; 199 id->src_ip &= mask->src_ip; 200 } 201 return id; 202 } 203 204 /* computes an OR of two masks, result in dst and also returned */ 205 static struct ipfw_flow_id * 206 flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) 207 { 208 int is_v6 = IS_IP6_FLOW_ID(dst); 209 210 dst->dst_port |= src->dst_port; 211 dst->src_port |= src->src_port; 212 dst->proto |= src->proto; 213 dst->extra |= src->extra; 214 if (is_v6) { 215 #define OR_MASK(_d, _s) \ 216 (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ 217 (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ 218 (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ 219 (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; 220 OR_MASK(&dst->dst_ip6, &src->dst_ip6); 221 OR_MASK(&dst->src_ip6, &src->src_ip6); 222 #undef OR_MASK 223 dst->flow_id6 |= src->flow_id6; 224 } else { 225 dst->dst_ip |= src->dst_ip; 226 dst->src_ip |= src->src_ip; 227 } 228 return dst; 229 } 230 231 static int 232 nonzero_mask(struct ipfw_flow_id *m) 233 { 234 if (m->dst_port || m->src_port || m->proto || m->extra) 235 return 1; 236 if (IS_IP6_FLOW_ID(m)) { 237 return 238 m->dst_ip6.__u6_addr.__u6_addr32[0] || 239 m->dst_ip6.__u6_addr.__u6_addr32[1] || 240 m->dst_ip6.__u6_addr.__u6_addr32[2] || 241 m->dst_ip6.__u6_addr.__u6_addr32[3] || 242 m->src_ip6.__u6_addr.__u6_addr32[0] || 243 m->src_ip6.__u6_addr.__u6_addr32[1] || 244 m->src_ip6.__u6_addr.__u6_addr32[2] || 245 m->src_ip6.__u6_addr.__u6_addr32[3] || 246 m->flow_id6; 247 } else { 248 return m->dst_ip || m->src_ip; 249 } 250 } 251 252 /* XXX we may want a better hash function */ 253 static uint32_t 254 flow_id_hash(struct ipfw_flow_id *id) 255 { 256 uint32_t i; 257 258 if (IS_IP6_FLOW_ID(id)) { 259 uint32_t *d = (uint32_t *)&id->dst_ip6; 260 uint32_t *s = (uint32_t *)&id->src_ip6; 261 i = (d[0] ) ^ (d[1]) ^ 262 (d[2] ) ^ (d[3]) ^ 263 (d[0] >> 15) ^ (d[1] >> 15) ^ 264 (d[2] >> 15) ^ (d[3] >> 15) ^ 265 (s[0] << 1) ^ (s[1] << 1) ^ 266 (s[2] << 1) ^ (s[3] << 1) ^ 267 (s[0] << 16) ^ (s[1] << 16) ^ 268 (s[2] << 16) ^ (s[3] << 16) ^ 269 (id->dst_port << 1) ^ (id->src_port) ^ 270 (id->extra) ^ 271 (id->proto ) ^ (id->flow_id6); 272 } else { 273 i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ 274 (id->src_ip << 1) ^ (id->src_ip >> 16) ^ 275 (id->extra) ^ 276 (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); 277 } 278 return i; 279 } 280 281 /* Like bcmp, returns 0 if ids match, 1 otherwise. */ 282 static int 283 flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) 284 { 285 int is_v6 = IS_IP6_FLOW_ID(id1); 286 287 if (!is_v6) { 288 if (IS_IP6_FLOW_ID(id2)) 289 return 1; /* different address families */ 290 291 return (id1->dst_ip == id2->dst_ip && 292 id1->src_ip == id2->src_ip && 293 id1->dst_port == id2->dst_port && 294 id1->src_port == id2->src_port && 295 id1->proto == id2->proto && 296 id1->extra == id2->extra) ? 0 : 1; 297 } 298 /* the ipv6 case */ 299 return ( 300 !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && 301 !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && 302 id1->dst_port == id2->dst_port && 303 id1->src_port == id2->src_port && 304 id1->proto == id2->proto && 305 id1->extra == id2->extra && 306 id1->flow_id6 == id2->flow_id6) ? 0 : 1; 307 } 308 /*--------- end of flow-id mask, hash and compare ---------*/ 309 310 /*--- support functions for the qht hashtable ---- 311 * Entries are hashed by flow-id 312 */ 313 static uint32_t 314 q_hash(uintptr_t key, int flags, void *arg) 315 { 316 /* compute the hash slot from the flow id */ 317 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 318 &((struct dn_queue *)key)->ni.fid : 319 (struct ipfw_flow_id *)key; 320 321 return flow_id_hash(id); 322 } 323 324 static int 325 q_match(void *obj, uintptr_t key, int flags, void *arg) 326 { 327 struct dn_queue *o = (struct dn_queue *)obj; 328 struct ipfw_flow_id *id2; 329 330 if (flags & DNHT_KEY_IS_OBJ) { 331 /* compare pointers */ 332 id2 = &((struct dn_queue *)key)->ni.fid; 333 } else { 334 id2 = (struct ipfw_flow_id *)key; 335 } 336 return (0 == flow_id_cmp(&o->ni.fid, id2)); 337 } 338 339 /* 340 * create a new queue instance for the given 'key'. 341 */ 342 static void * 343 q_new(uintptr_t key, int flags, void *arg) 344 { 345 struct dn_queue *q, *template = arg; 346 struct dn_fsk *fs = template->fs; 347 int size = sizeof(*q) + fs->sched->fp->q_datalen; 348 349 q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO); 350 if (q == NULL) { 351 D("no memory for new queue"); 352 return NULL; 353 } 354 355 set_oid(&q->ni.oid, DN_QUEUE, size); 356 if (fs->fs.flags & DN_QHT_HASH) 357 q->ni.fid = *(struct ipfw_flow_id *)key; 358 q->fs = fs; 359 q->_si = template->_si; 360 q->_si->q_count++; 361 362 if (fs->sched->fp->new_queue) 363 fs->sched->fp->new_queue(q); 364 365 #ifdef NEW_AQM 366 /* call AQM init function after creating a queue*/ 367 if (fs->aqmfp && fs->aqmfp->init) 368 if(fs->aqmfp->init(q)) 369 D("unable to init AQM for fs %d", fs->fs.fs_nr); 370 #endif 371 V_dn_cfg.queue_count++; 372 373 return q; 374 } 375 376 /* 377 * Notify schedulers that a queue is going away. 378 * If (flags & DN_DESTROY), also free the packets. 379 * The version for callbacks is called q_delete_cb(). 380 */ 381 static void 382 dn_delete_queue(struct dn_queue *q, int flags) 383 { 384 struct dn_fsk *fs = q->fs; 385 386 #ifdef NEW_AQM 387 /* clean up AQM status for queue 'q' 388 * cleanup here is called just with MULTIQUEUE 389 */ 390 if (fs && fs->aqmfp && fs->aqmfp->cleanup) 391 fs->aqmfp->cleanup(q); 392 #endif 393 // D("fs %p si %p\n", fs, q->_si); 394 /* notify the parent scheduler that the queue is going away */ 395 if (fs && fs->sched->fp->free_queue) 396 fs->sched->fp->free_queue(q); 397 q->_si->q_count--; 398 q->_si = NULL; 399 if (flags & DN_DESTROY) { 400 if (q->mq.head) 401 dn_free_pkts(q->mq.head); 402 bzero(q, sizeof(*q)); // safety 403 free(q, M_DUMMYNET); 404 V_dn_cfg.queue_count--; 405 } 406 } 407 408 static int 409 q_delete_cb(void *q, void *arg) 410 { 411 int flags = (int)(uintptr_t)arg; 412 dn_delete_queue(q, flags); 413 return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0; 414 } 415 416 /* 417 * calls dn_delete_queue/q_delete_cb on all queues, 418 * which notifies the parent scheduler and possibly drains packets. 419 * flags & DN_DESTROY: drains queues and destroy qht; 420 */ 421 static void 422 qht_delete(struct dn_fsk *fs, int flags) 423 { 424 ND("fs %d start flags %d qht %p", 425 fs->fs.fs_nr, flags, fs->qht); 426 if (!fs->qht) 427 return; 428 if (fs->fs.flags & DN_QHT_HASH) { 429 dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags); 430 if (flags & DN_DESTROY) { 431 dn_ht_free(fs->qht, 0); 432 fs->qht = NULL; 433 } 434 } else { 435 dn_delete_queue((struct dn_queue *)(fs->qht), flags); 436 if (flags & DN_DESTROY) 437 fs->qht = NULL; 438 } 439 } 440 441 /* 442 * Find and possibly create the queue for a MULTIQUEUE scheduler. 443 * We never call it for !MULTIQUEUE (the queue is in the sch_inst). 444 */ 445 struct dn_queue * 446 ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, 447 struct ipfw_flow_id *id) 448 { 449 struct dn_queue template; 450 451 template._si = si; 452 template.fs = fs; 453 454 if (fs->fs.flags & DN_QHT_HASH) { 455 struct ipfw_flow_id masked_id; 456 if (fs->qht == NULL) { 457 fs->qht = dn_ht_init(NULL, fs->fs.buckets, 458 offsetof(struct dn_queue, q_next), 459 q_hash, q_match, q_new); 460 if (fs->qht == NULL) 461 return NULL; 462 } 463 masked_id = *id; 464 flow_id_mask(&fs->fsk_mask, &masked_id); 465 return dn_ht_find(fs->qht, (uintptr_t)&masked_id, 466 DNHT_INSERT, &template); 467 } else { 468 if (fs->qht == NULL) 469 fs->qht = q_new(0, 0, &template); 470 return (struct dn_queue *)fs->qht; 471 } 472 } 473 /*--- end of queue hash table ---*/ 474 475 /*--- support functions for the sch_inst hashtable ---- 476 * 477 * These are hashed by flow-id 478 */ 479 static uint32_t 480 si_hash(uintptr_t key, int flags, void *arg) 481 { 482 /* compute the hash slot from the flow id */ 483 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 484 &((struct dn_sch_inst *)key)->ni.fid : 485 (struct ipfw_flow_id *)key; 486 487 return flow_id_hash(id); 488 } 489 490 static int 491 si_match(void *obj, uintptr_t key, int flags, void *arg) 492 { 493 struct dn_sch_inst *o = obj; 494 struct ipfw_flow_id *id2; 495 496 id2 = (flags & DNHT_KEY_IS_OBJ) ? 497 &((struct dn_sch_inst *)key)->ni.fid : 498 (struct ipfw_flow_id *)key; 499 return flow_id_cmp(&o->ni.fid, id2) == 0; 500 } 501 502 /* 503 * create a new instance for the given 'key' 504 * Allocate memory for instance, delay line and scheduler private data. 505 */ 506 static void * 507 si_new(uintptr_t key, int flags, void *arg) 508 { 509 struct dn_schk *s = arg; 510 struct dn_sch_inst *si; 511 int l = sizeof(*si) + s->fp->si_datalen; 512 513 si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 514 if (si == NULL) 515 goto error; 516 517 /* Set length only for the part passed up to userland. */ 518 set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); 519 set_oid(&(si->dline.oid), DN_DELAY_LINE, 520 sizeof(struct delay_line)); 521 /* mark si and dline as outside the event queue */ 522 si->ni.oid.id = si->dline.oid.id = -1; 523 524 si->sched = s; 525 si->dline.si = si; 526 527 if (s->fp->new_sched && s->fp->new_sched(si)) { 528 D("new_sched error"); 529 goto error; 530 } 531 if (s->sch.flags & DN_HAVE_MASK) 532 si->ni.fid = *(struct ipfw_flow_id *)key; 533 534 #ifdef NEW_AQM 535 /* init AQM status for !DN_MULTIQUEUE sched*/ 536 if (!(s->fp->flags & DN_MULTIQUEUE)) 537 if (s->fs->aqmfp && s->fs->aqmfp->init) 538 if(s->fs->aqmfp->init((struct dn_queue *)(si + 1))) { 539 D("unable to init AQM for fs %d", s->fs->fs.fs_nr); 540 goto error; 541 } 542 #endif 543 544 V_dn_cfg.si_count++; 545 return si; 546 547 error: 548 if (si) { 549 bzero(si, sizeof(*si)); // safety 550 free(si, M_DUMMYNET); 551 } 552 return NULL; 553 } 554 555 /* 556 * Callback from siht to delete all scheduler instances. Remove 557 * si and delay line from the system heap, destroy all queues. 558 * We assume that all flowset have been notified and do not 559 * point to us anymore. 560 */ 561 static int 562 si_destroy(void *_si, void *arg) 563 { 564 struct dn_sch_inst *si = _si; 565 struct dn_schk *s = si->sched; 566 struct delay_line *dl = &si->dline; 567 568 if (dl->oid.subtype) /* remove delay line from event heap */ 569 heap_extract(&V_dn_cfg.evheap, dl); 570 dn_free_pkts(dl->mq.head); /* drain delay line */ 571 if (si->kflags & DN_ACTIVE) /* remove si from event heap */ 572 heap_extract(&V_dn_cfg.evheap, si); 573 574 #ifdef NEW_AQM 575 /* clean up AQM status for !DN_MULTIQUEUE sched 576 * Note that all queues belong to fs were cleaned up in fsk_detach. 577 * When drain_scheduler is called s->fs and q->fs are pointing 578 * to a correct fs, so we can use fs in this case. 579 */ 580 if (!(s->fp->flags & DN_MULTIQUEUE)) { 581 struct dn_queue *q = (struct dn_queue *)(si + 1); 582 if (q->aqm_status && q->fs->aqmfp) 583 if (q->fs->aqmfp->cleanup) 584 q->fs->aqmfp->cleanup(q); 585 } 586 #endif 587 if (s->fp->free_sched) 588 s->fp->free_sched(si); 589 bzero(si, sizeof(*si)); /* safety */ 590 free(si, M_DUMMYNET); 591 V_dn_cfg.si_count--; 592 return DNHT_SCAN_DEL; 593 } 594 595 /* 596 * Find the scheduler instance for this packet. If we need to apply 597 * a mask, do on a local copy of the flow_id to preserve the original. 598 * Assume siht is always initialized if we have a mask. 599 */ 600 struct dn_sch_inst * 601 ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id) 602 { 603 604 if (s->sch.flags & DN_HAVE_MASK) { 605 struct ipfw_flow_id id_t = *id; 606 flow_id_mask(&s->sch.sched_mask, &id_t); 607 return dn_ht_find(s->siht, (uintptr_t)&id_t, 608 DNHT_INSERT, s); 609 } 610 if (!s->siht) 611 s->siht = si_new(0, 0, s); 612 return (struct dn_sch_inst *)s->siht; 613 } 614 615 /* callback to flush credit for the scheduler instance */ 616 static int 617 si_reset_credit(void *_si, void *arg) 618 { 619 struct dn_sch_inst *si = _si; 620 struct dn_link *p = &si->sched->link; 621 622 si->credit = p->burst + (V_dn_cfg.io_fast ? p->bandwidth : 0); 623 return 0; 624 } 625 626 static void 627 schk_reset_credit(struct dn_schk *s) 628 { 629 if (s->sch.flags & DN_HAVE_MASK) 630 dn_ht_scan(s->siht, si_reset_credit, NULL); 631 else if (s->siht) 632 si_reset_credit(s->siht, NULL); 633 } 634 /*---- end of sch_inst hashtable ---------------------*/ 635 636 /*------------------------------------------------------- 637 * flowset hash (fshash) support. Entries are hashed by fs_nr. 638 * New allocations are put in the fsunlinked list, from which 639 * they are removed when they point to a specific scheduler. 640 */ 641 static uint32_t 642 fsk_hash(uintptr_t key, int flags, void *arg) 643 { 644 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 645 ((struct dn_fsk *)key)->fs.fs_nr; 646 647 return ( (i>>8)^(i>>4)^i ); 648 } 649 650 static int 651 fsk_match(void *obj, uintptr_t key, int flags, void *arg) 652 { 653 struct dn_fsk *fs = obj; 654 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 655 ((struct dn_fsk *)key)->fs.fs_nr; 656 657 return (fs->fs.fs_nr == i); 658 } 659 660 static void * 661 fsk_new(uintptr_t key, int flags, void *arg) 662 { 663 struct dn_fsk *fs; 664 665 fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO); 666 if (fs) { 667 set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs)); 668 V_dn_cfg.fsk_count++; 669 fs->drain_bucket = 0; 670 SLIST_INSERT_HEAD(&V_dn_cfg.fsu, fs, sch_chain); 671 } 672 return fs; 673 } 674 675 #ifdef NEW_AQM 676 /* callback function for cleaning up AQM queue status belongs to a flowset 677 * connected to scheduler instance '_si' (for !DN_MULTIQUEUE only). 678 */ 679 static int 680 si_cleanup_q(void *_si, void *arg) 681 { 682 struct dn_sch_inst *si = _si; 683 684 if (!(si->sched->fp->flags & DN_MULTIQUEUE)) { 685 if (si->sched->fs->aqmfp && si->sched->fs->aqmfp->cleanup) 686 si->sched->fs->aqmfp->cleanup((struct dn_queue *) (si+1)); 687 } 688 return 0; 689 } 690 691 /* callback to clean up queue AQM status.*/ 692 static int 693 q_cleanup_q(void *_q, void *arg) 694 { 695 struct dn_queue *q = _q; 696 q->fs->aqmfp->cleanup(q); 697 return 0; 698 } 699 700 /* Clean up all AQM queues status belongs to flowset 'fs' and then 701 * deconfig AQM for flowset 'fs' 702 */ 703 static void 704 aqm_cleanup_deconfig_fs(struct dn_fsk *fs) 705 { 706 struct dn_sch_inst *si; 707 708 /* clean up AQM status for all queues for !DN_MULTIQUEUE sched*/ 709 if (fs->fs.fs_nr > DN_MAX_ID) { 710 if (fs->sched && !(fs->sched->fp->flags & DN_MULTIQUEUE)) { 711 if (fs->sched->sch.flags & DN_HAVE_MASK) 712 dn_ht_scan(fs->sched->siht, si_cleanup_q, NULL); 713 else { 714 /* single si i.e. no sched mask */ 715 si = (struct dn_sch_inst *) fs->sched->siht; 716 if (si && fs->aqmfp && fs->aqmfp->cleanup) 717 fs->aqmfp->cleanup((struct dn_queue *) (si+1)); 718 } 719 } 720 } 721 722 /* clean up AQM status for all queues for DN_MULTIQUEUE sched*/ 723 if (fs->sched && fs->sched->fp->flags & DN_MULTIQUEUE && fs->qht) { 724 if (fs->fs.flags & DN_QHT_HASH) 725 dn_ht_scan(fs->qht, q_cleanup_q, NULL); 726 else 727 fs->aqmfp->cleanup((struct dn_queue *)(fs->qht)); 728 } 729 730 /* deconfig AQM */ 731 if(fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig) 732 fs->aqmfp->deconfig(fs); 733 } 734 #endif 735 736 /* 737 * detach flowset from its current scheduler. Flags as follows: 738 * DN_DETACH removes from the fsk_list 739 * DN_DESTROY deletes individual queues 740 * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked). 741 */ 742 static void 743 fsk_detach(struct dn_fsk *fs, int flags) 744 { 745 if (flags & DN_DELETE_FS) 746 flags |= DN_DESTROY; 747 ND("fs %d from sched %d flags %s %s %s", 748 fs->fs.fs_nr, fs->fs.sched_nr, 749 (flags & DN_DELETE_FS) ? "DEL_FS":"", 750 (flags & DN_DESTROY) ? "DEL":"", 751 (flags & DN_DETACH) ? "DET":""); 752 if (flags & DN_DETACH) { /* detach from the list */ 753 struct dn_fsk_head *h; 754 h = fs->sched ? &fs->sched->fsk_list : &V_dn_cfg.fsu; 755 SLIST_REMOVE(h, fs, dn_fsk, sch_chain); 756 } 757 /* Free the RED parameters, they will be recomputed on 758 * subsequent attach if needed. 759 */ 760 free(fs->w_q_lookup, M_DUMMYNET); 761 fs->w_q_lookup = NULL; 762 qht_delete(fs, flags); 763 #ifdef NEW_AQM 764 aqm_cleanup_deconfig_fs(fs); 765 #endif 766 767 if (fs->sched && fs->sched->fp->free_fsk) 768 fs->sched->fp->free_fsk(fs); 769 fs->sched = NULL; 770 if (flags & DN_DELETE_FS) { 771 bzero(fs, sizeof(*fs)); /* safety */ 772 free(fs, M_DUMMYNET); 773 V_dn_cfg.fsk_count--; 774 } else { 775 SLIST_INSERT_HEAD(&V_dn_cfg.fsu, fs, sch_chain); 776 } 777 } 778 779 /* 780 * Detach or destroy all flowsets in a list. 781 * flags specifies what to do: 782 * DN_DESTROY: flush all queues 783 * DN_DELETE_FS: DN_DESTROY + destroy flowset 784 * DN_DELETE_FS implies DN_DESTROY 785 */ 786 static void 787 fsk_detach_list(struct dn_fsk_head *h, int flags) 788 { 789 struct dn_fsk *fs; 790 int n = 0; /* only for stats */ 791 792 ND("head %p flags %x", h, flags); 793 while ((fs = SLIST_FIRST(h))) { 794 SLIST_REMOVE_HEAD(h, sch_chain); 795 n++; 796 fsk_detach(fs, flags); 797 } 798 ND("done %d flowsets", n); 799 } 800 801 /* 802 * called on 'queue X delete' -- removes the flowset from fshash, 803 * deletes all queues for the flowset, and removes the flowset. 804 */ 805 static int 806 delete_fs(int i, int locked) 807 { 808 struct dn_fsk *fs; 809 int err = 0; 810 811 if (!locked) 812 DN_BH_WLOCK(); 813 fs = dn_ht_find(V_dn_cfg.fshash, i, DNHT_REMOVE, NULL); 814 ND("fs %d found %p", i, fs); 815 if (fs) { 816 fsk_detach(fs, DN_DETACH | DN_DELETE_FS); 817 err = 0; 818 } else 819 err = EINVAL; 820 if (!locked) 821 DN_BH_WUNLOCK(); 822 return err; 823 } 824 825 /*----- end of flowset hashtable support -------------*/ 826 827 /*------------------------------------------------------------ 828 * Scheduler hash. When searching by index we pass sched_nr, 829 * otherwise we pass struct dn_sch * which is the first field in 830 * struct dn_schk so we can cast between the two. We use this trick 831 * because in the create phase (but it should be fixed). 832 */ 833 static uint32_t 834 schk_hash(uintptr_t key, int flags, void *_arg) 835 { 836 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 837 ((struct dn_schk *)key)->sch.sched_nr; 838 return ( (i>>8)^(i>>4)^i ); 839 } 840 841 static int 842 schk_match(void *obj, uintptr_t key, int flags, void *_arg) 843 { 844 struct dn_schk *s = (struct dn_schk *)obj; 845 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 846 ((struct dn_schk *)key)->sch.sched_nr; 847 return (s->sch.sched_nr == i); 848 } 849 850 /* 851 * Create the entry and intialize with the sched hash if needed. 852 * Leave s->fp unset so we can tell whether a dn_ht_find() returns 853 * a new object or a previously existing one. 854 */ 855 static void * 856 schk_new(uintptr_t key, int flags, void *arg) 857 { 858 struct schk_new_arg *a = arg; 859 struct dn_schk *s; 860 int l = sizeof(*s) +a->fp->schk_datalen; 861 862 s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 863 if (s == NULL) 864 return NULL; 865 set_oid(&s->link.oid, DN_LINK, sizeof(s->link)); 866 s->sch = *a->sch; // copy initial values 867 s->link.link_nr = s->sch.sched_nr; 868 SLIST_INIT(&s->fsk_list); 869 /* initialize the hash table or create the single instance */ 870 s->fp = a->fp; /* si_new needs this */ 871 s->drain_bucket = 0; 872 if (s->sch.flags & DN_HAVE_MASK) { 873 s->siht = dn_ht_init(NULL, s->sch.buckets, 874 offsetof(struct dn_sch_inst, si_next), 875 si_hash, si_match, si_new); 876 if (s->siht == NULL) { 877 free(s, M_DUMMYNET); 878 return NULL; 879 } 880 } 881 s->fp = NULL; /* mark as a new scheduler */ 882 V_dn_cfg.schk_count++; 883 return s; 884 } 885 886 /* 887 * Callback for sched delete. Notify all attached flowsets to 888 * detach from the scheduler, destroy the internal flowset, and 889 * all instances. The scheduler goes away too. 890 * arg is 0 (only detach flowsets and destroy instances) 891 * DN_DESTROY (detach & delete queues, delete schk) 892 * or DN_DELETE_FS (delete queues and flowsets, delete schk) 893 */ 894 static int 895 schk_delete_cb(void *obj, void *arg) 896 { 897 struct dn_schk *s = obj; 898 #if 0 899 int a = (int)arg; 900 ND("sched %d arg %s%s", 901 s->sch.sched_nr, 902 a&DN_DESTROY ? "DEL ":"", 903 a&DN_DELETE_FS ? "DEL_FS":""); 904 #endif 905 fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); 906 /* no more flowset pointing to us now */ 907 if (s->sch.flags & DN_HAVE_MASK) { 908 dn_ht_scan(s->siht, si_destroy, NULL); 909 dn_ht_free(s->siht, 0); 910 } else if (s->siht) 911 si_destroy(s->siht, NULL); 912 913 free(s->profile, M_DUMMYNET); 914 s->profile = NULL; 915 s->siht = NULL; 916 if (s->fp->destroy) 917 s->fp->destroy(s); 918 bzero(s, sizeof(*s)); // safety 919 free(obj, M_DUMMYNET); 920 V_dn_cfg.schk_count--; 921 return DNHT_SCAN_DEL; 922 } 923 924 /* 925 * called on a 'sched X delete' command. Deletes a single scheduler. 926 * This is done by removing from the schedhash, unlinking all 927 * flowsets and deleting their traffic. 928 */ 929 static int 930 delete_schk(int i) 931 { 932 struct dn_schk *s; 933 934 s = dn_ht_find(V_dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 935 ND("%d %p", i, s); 936 if (!s) 937 return EINVAL; 938 delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */ 939 /* then detach flowsets, delete traffic */ 940 schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY); 941 return 0; 942 } 943 /*--- end of schk hashtable support ---*/ 944 945 static int 946 copy_obj(char **start, char *end, void *_o, const char *msg, int i) 947 { 948 struct dn_id o; 949 union { 950 struct dn_link l; 951 struct dn_schk s; 952 } dn; 953 int have = end - *start; 954 955 memcpy(&o, _o, sizeof(o)); 956 if (have < o.len || o.len == 0 || o.type == 0) { 957 D("(WARN) type %d %s %d have %d need %d", 958 o.type, msg, i, have, o.len); 959 return 1; 960 } 961 ND("type %d %s %d len %d", o.type, msg, i, o.len); 962 if (o.type == DN_LINK) { 963 memcpy(&dn.l, _o, sizeof(dn.l)); 964 /* Adjust burst parameter for link */ 965 dn.l.burst = div64(dn.l.burst, 8 * hz); 966 dn.l.delay = dn.l.delay * 1000 / hz; 967 memcpy(*start, &dn.l, sizeof(dn.l)); 968 } else if (o.type == DN_SCH) { 969 /* Set dn.s.sch.oid.id to the number of instances */ 970 memcpy(&dn.s, _o, sizeof(dn.s)); 971 dn.s.sch.oid.id = (dn.s.sch.flags & DN_HAVE_MASK) ? 972 dn_ht_entries(dn.s.siht) : (dn.s.siht ? 1 : 0); 973 memcpy(*start, &dn.s, sizeof(dn.s)); 974 } else 975 memcpy(*start, _o, o.len); 976 *start += o.len; 977 return 0; 978 } 979 980 /* Specific function to copy a queue. 981 * Copies only the user-visible part of a queue (which is in 982 * a struct dn_flow), and sets len accordingly. 983 */ 984 static int 985 copy_obj_q(char **start, char *end, void *_o, const char *msg, int i) 986 { 987 struct dn_id *o = _o; 988 int have = end - *start; 989 int len = sizeof(struct dn_flow); /* see above comment */ 990 991 if (have < len || o->len == 0 || o->type != DN_QUEUE) { 992 D("ERROR type %d %s %d have %d need %d", 993 o->type, msg, i, have, len); 994 return 1; 995 } 996 ND("type %d %s %d len %d", o->type, msg, i, len); 997 memcpy(*start, _o, len); 998 ((struct dn_id*)(*start))->len = len; 999 *start += len; 1000 return 0; 1001 } 1002 1003 static int 1004 copy_q_cb(void *obj, void *arg) 1005 { 1006 struct dn_queue *q = obj; 1007 struct copy_args *a = arg; 1008 struct dn_flow *ni = (struct dn_flow *)(*a->start); 1009 if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1)) 1010 return DNHT_SCAN_END; 1011 ni->oid.type = DN_FLOW; /* override the DN_QUEUE */ 1012 ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL); 1013 return 0; 1014 } 1015 1016 static int 1017 copy_q(struct copy_args *a, struct dn_fsk *fs, int flags) 1018 { 1019 if (!fs->qht) 1020 return 0; 1021 if (fs->fs.flags & DN_QHT_HASH) 1022 dn_ht_scan(fs->qht, copy_q_cb, a); 1023 else 1024 copy_q_cb(fs->qht, a); 1025 return 0; 1026 } 1027 1028 /* 1029 * This routine only copies the initial part of a profile ? XXX 1030 */ 1031 static int 1032 copy_profile(struct copy_args *a, struct dn_profile *p) 1033 { 1034 int have = a->end - *a->start; 1035 /* XXX here we check for max length */ 1036 int profile_len = sizeof(struct dn_profile) - 1037 ED_MAX_SAMPLES_NO*sizeof(int); 1038 1039 if (p == NULL) 1040 return 0; 1041 if (have < profile_len) { 1042 D("error have %d need %d", have, profile_len); 1043 return 1; 1044 } 1045 memcpy(*a->start, p, profile_len); 1046 ((struct dn_id *)(*a->start))->len = profile_len; 1047 *a->start += profile_len; 1048 return 0; 1049 } 1050 1051 static int 1052 copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags) 1053 { 1054 struct dn_fs *ufs = (struct dn_fs *)(*a->start); 1055 if (!fs) 1056 return 0; 1057 ND("flowset %d", fs->fs.fs_nr); 1058 if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr)) 1059 return DNHT_SCAN_END; 1060 ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ? 1061 dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0); 1062 if (flags) { /* copy queues */ 1063 copy_q(a, fs, 0); 1064 } 1065 return 0; 1066 } 1067 1068 static int 1069 copy_si_cb(void *obj, void *arg) 1070 { 1071 struct dn_sch_inst *si = obj; 1072 struct copy_args *a = arg; 1073 struct dn_flow *ni = (struct dn_flow *)(*a->start); 1074 if (copy_obj(a->start, a->end, &si->ni, "inst", 1075 si->sched->sch.sched_nr)) 1076 return DNHT_SCAN_END; 1077 ni->oid.type = DN_FLOW; /* override the DN_SCH_I */ 1078 ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL); 1079 return 0; 1080 } 1081 1082 static int 1083 copy_si(struct copy_args *a, struct dn_schk *s, int flags) 1084 { 1085 if (s->sch.flags & DN_HAVE_MASK) 1086 dn_ht_scan(s->siht, copy_si_cb, a); 1087 else if (s->siht) 1088 copy_si_cb(s->siht, a); 1089 return 0; 1090 } 1091 1092 /* 1093 * compute a list of children of a scheduler and copy up 1094 */ 1095 static int 1096 copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags) 1097 { 1098 struct dn_fsk *fs; 1099 struct dn_id *o; 1100 uint32_t *p; 1101 1102 int n = 0, space = sizeof(*o); 1103 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1104 if (fs->fs.fs_nr < DN_MAX_ID) 1105 n++; 1106 } 1107 space += n * sizeof(uint32_t); 1108 DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n); 1109 if (a->end - *(a->start) < space) 1110 return DNHT_SCAN_END; 1111 o = (struct dn_id *)(*(a->start)); 1112 o->len = space; 1113 *a->start += o->len; 1114 o->type = DN_TEXT; 1115 p = (uint32_t *)(o+1); 1116 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) 1117 if (fs->fs.fs_nr < DN_MAX_ID) 1118 *p++ = fs->fs.fs_nr; 1119 return 0; 1120 } 1121 1122 static int 1123 copy_data_helper(void *_o, void *_arg) 1124 { 1125 struct copy_args *a = _arg; 1126 uint32_t *r = a->extra->r; /* start of first range */ 1127 uint32_t *lim; /* first invalid pointer */ 1128 int n; 1129 1130 lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len); 1131 1132 if (a->type == DN_LINK || a->type == DN_SCH) { 1133 /* pipe|sched show, we receive a dn_schk */ 1134 struct dn_schk *s = _o; 1135 1136 n = s->sch.sched_nr; 1137 if (a->type == DN_SCH && n >= DN_MAX_ID) 1138 return 0; /* not a scheduler */ 1139 if (a->type == DN_LINK && n <= DN_MAX_ID) 1140 return 0; /* not a pipe */ 1141 1142 /* see if the object is within one of our ranges */ 1143 for (;r < lim; r += 2) { 1144 if (n < r[0] || n > r[1]) 1145 continue; 1146 /* Found a valid entry, copy and we are done */ 1147 if (a->flags & DN_C_LINK) { 1148 if (copy_obj(a->start, a->end, 1149 &s->link, "link", n)) 1150 return DNHT_SCAN_END; 1151 if (copy_profile(a, s->profile)) 1152 return DNHT_SCAN_END; 1153 if (copy_flowset(a, s->fs, 0)) 1154 return DNHT_SCAN_END; 1155 } 1156 if (a->flags & DN_C_SCH) { 1157 if (copy_obj(a->start, a->end, 1158 &s->sch, "sched", n)) 1159 return DNHT_SCAN_END; 1160 /* list all attached flowsets */ 1161 if (copy_fsk_list(a, s, 0)) 1162 return DNHT_SCAN_END; 1163 } 1164 if (a->flags & DN_C_FLOW) 1165 copy_si(a, s, 0); 1166 break; 1167 } 1168 } else if (a->type == DN_FS) { 1169 /* queue show, skip internal flowsets */ 1170 struct dn_fsk *fs = _o; 1171 1172 n = fs->fs.fs_nr; 1173 if (n >= DN_MAX_ID) 1174 return 0; 1175 /* see if the object is within one of our ranges */ 1176 for (;r < lim; r += 2) { 1177 if (n < r[0] || n > r[1]) 1178 continue; 1179 if (copy_flowset(a, fs, 0)) 1180 return DNHT_SCAN_END; 1181 copy_q(a, fs, 0); 1182 break; /* we are done */ 1183 } 1184 } 1185 return 0; 1186 } 1187 1188 static inline struct dn_schk * 1189 locate_scheduler(int i) 1190 { 1191 return dn_ht_find(V_dn_cfg.schedhash, i, 0, NULL); 1192 } 1193 1194 /* 1195 * red parameters are in fixed point arithmetic. 1196 */ 1197 static int 1198 config_red(struct dn_fsk *fs) 1199 { 1200 int64_t s, idle, weight, w0; 1201 int t, i; 1202 1203 fs->w_q = fs->fs.w_q; 1204 fs->max_p = fs->fs.max_p; 1205 ND("called"); 1206 /* Doing stuff that was in userland */ 1207 i = fs->sched->link.bandwidth; 1208 s = (i <= 0) ? 0 : 1209 hz * V_dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; 1210 1211 idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */ 1212 fs->lookup_step = div64(idle , V_dn_cfg.red_lookup_depth); 1213 /* fs->lookup_step not scaled, */ 1214 if (!fs->lookup_step) 1215 fs->lookup_step = 1; 1216 w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled 1217 1218 for (t = fs->lookup_step; t > 1; --t) 1219 weight = SCALE_MUL(weight, w0); 1220 fs->lookup_weight = (int)(weight); // scaled 1221 1222 /* Now doing stuff that was in kerneland */ 1223 fs->min_th = SCALE(fs->fs.min_th); 1224 fs->max_th = SCALE(fs->fs.max_th); 1225 1226 if (fs->fs.max_th == fs->fs.min_th) 1227 fs->c_1 = fs->max_p; 1228 else 1229 fs->c_1 = SCALE((int64_t)(fs->max_p)) / (fs->fs.max_th - fs->fs.min_th); 1230 fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th)); 1231 1232 if (fs->fs.flags & DN_IS_GENTLE_RED) { 1233 fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th; 1234 fs->c_4 = SCALE(1) - 2 * fs->max_p; 1235 } 1236 1237 /* If the lookup table already exist, free and create it again. */ 1238 free(fs->w_q_lookup, M_DUMMYNET); 1239 fs->w_q_lookup = NULL; 1240 if (V_dn_cfg.red_lookup_depth == 0) { 1241 printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" 1242 "must be > 0\n"); 1243 fs->fs.flags &= ~DN_IS_RED; 1244 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1245 return (EINVAL); 1246 } 1247 fs->lookup_depth = V_dn_cfg.red_lookup_depth; 1248 fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int), 1249 M_DUMMYNET, M_NOWAIT); 1250 if (fs->w_q_lookup == NULL) { 1251 printf("dummynet: sorry, cannot allocate red lookup table\n"); 1252 fs->fs.flags &= ~DN_IS_RED; 1253 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1254 return(ENOSPC); 1255 } 1256 1257 /* Fill the lookup table with (1 - w_q)^x */ 1258 fs->w_q_lookup[0] = SCALE(1) - fs->w_q; 1259 1260 for (i = 1; i < fs->lookup_depth; i++) 1261 fs->w_q_lookup[i] = 1262 SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight); 1263 1264 if (V_dn_cfg.red_avg_pkt_size < 1) 1265 V_dn_cfg.red_avg_pkt_size = 512; 1266 fs->avg_pkt_size = V_dn_cfg.red_avg_pkt_size; 1267 if (V_dn_cfg.red_max_pkt_size < 1) 1268 V_dn_cfg.red_max_pkt_size = 1500; 1269 fs->max_pkt_size = V_dn_cfg.red_max_pkt_size; 1270 ND("exit"); 1271 return 0; 1272 } 1273 1274 /* Scan all flowset attached to this scheduler and update red */ 1275 static void 1276 update_red(struct dn_schk *s) 1277 { 1278 struct dn_fsk *fs; 1279 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1280 if (fs && (fs->fs.flags & DN_IS_RED)) 1281 config_red(fs); 1282 } 1283 } 1284 1285 /* attach flowset to scheduler s, possibly requeue */ 1286 static void 1287 fsk_attach(struct dn_fsk *fs, struct dn_schk *s) 1288 { 1289 ND("remove fs %d from fsunlinked, link to sched %d", 1290 fs->fs.fs_nr, s->sch.sched_nr); 1291 SLIST_REMOVE(&V_dn_cfg.fsu, fs, dn_fsk, sch_chain); 1292 fs->sched = s; 1293 SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); 1294 if (s->fp->new_fsk) 1295 s->fp->new_fsk(fs); 1296 /* XXX compute fsk_mask */ 1297 fs->fsk_mask = fs->fs.flow_mask; 1298 if (fs->sched->sch.flags & DN_HAVE_MASK) 1299 flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask); 1300 if (fs->qht) { 1301 /* 1302 * we must drain qht according to the old 1303 * type, and reinsert according to the new one. 1304 * The requeue is complex -- in general we need to 1305 * reclassify every single packet. 1306 * For the time being, let's hope qht is never set 1307 * when we reach this point. 1308 */ 1309 D("XXX TODO requeue from fs %d to sch %d", 1310 fs->fs.fs_nr, s->sch.sched_nr); 1311 fs->qht = NULL; 1312 } 1313 /* set the new type for qht */ 1314 if (nonzero_mask(&fs->fsk_mask)) 1315 fs->fs.flags |= DN_QHT_HASH; 1316 else 1317 fs->fs.flags &= ~DN_QHT_HASH; 1318 1319 /* XXX config_red() can fail... */ 1320 if (fs->fs.flags & DN_IS_RED) 1321 config_red(fs); 1322 } 1323 1324 /* update all flowsets which may refer to this scheduler */ 1325 static void 1326 update_fs(struct dn_schk *s) 1327 { 1328 struct dn_fsk *fs, *tmp; 1329 1330 SLIST_FOREACH_SAFE(fs, &V_dn_cfg.fsu, sch_chain, tmp) { 1331 if (s->sch.sched_nr != fs->fs.sched_nr) { 1332 D("fs %d for sch %d not %d still unlinked", 1333 fs->fs.fs_nr, fs->fs.sched_nr, 1334 s->sch.sched_nr); 1335 continue; 1336 } 1337 fsk_attach(fs, s); 1338 } 1339 } 1340 1341 #ifdef NEW_AQM 1342 /* Retrieve AQM configurations to ipfw userland 1343 */ 1344 static int 1345 get_aqm_parms(struct sockopt *sopt) 1346 { 1347 struct dn_extra_parms *ep; 1348 struct dn_fsk *fs; 1349 size_t sopt_valsize; 1350 int l, err = 0; 1351 1352 sopt_valsize = sopt->sopt_valsize; 1353 l = sizeof(*ep); 1354 if (sopt->sopt_valsize < l) { 1355 D("bad len sopt->sopt_valsize %d len %d", 1356 (int) sopt->sopt_valsize , l); 1357 err = EINVAL; 1358 return err; 1359 } 1360 ep = malloc(l, M_DUMMYNET, M_NOWAIT); 1361 if(!ep) { 1362 err = ENOMEM ; 1363 return err; 1364 } 1365 do { 1366 err = sooptcopyin(sopt, ep, l, l); 1367 if(err) 1368 break; 1369 sopt->sopt_valsize = sopt_valsize; 1370 if (ep->oid.len < l) { 1371 err = EINVAL; 1372 break; 1373 } 1374 1375 fs = dn_ht_find(V_dn_cfg.fshash, ep->nr, 0, NULL); 1376 if (!fs) { 1377 D("fs %d not found", ep->nr); 1378 err = EINVAL; 1379 break; 1380 } 1381 1382 if (fs->aqmfp && fs->aqmfp->getconfig) { 1383 if(fs->aqmfp->getconfig(fs, ep)) { 1384 D("Error while trying to get AQM params"); 1385 err = EINVAL; 1386 break; 1387 } 1388 ep->oid.len = l; 1389 err = sooptcopyout(sopt, ep, l); 1390 } 1391 }while(0); 1392 1393 free(ep, M_DUMMYNET); 1394 return err; 1395 } 1396 1397 /* Retrieve AQM configurations to ipfw userland 1398 */ 1399 static int 1400 get_sched_parms(struct sockopt *sopt) 1401 { 1402 struct dn_extra_parms *ep; 1403 struct dn_schk *schk; 1404 size_t sopt_valsize; 1405 int l, err = 0; 1406 1407 sopt_valsize = sopt->sopt_valsize; 1408 l = sizeof(*ep); 1409 if (sopt->sopt_valsize < l) { 1410 D("bad len sopt->sopt_valsize %d len %d", 1411 (int) sopt->sopt_valsize , l); 1412 err = EINVAL; 1413 return err; 1414 } 1415 ep = malloc(l, M_DUMMYNET, M_NOWAIT); 1416 if(!ep) { 1417 err = ENOMEM ; 1418 return err; 1419 } 1420 do { 1421 err = sooptcopyin(sopt, ep, l, l); 1422 if(err) 1423 break; 1424 sopt->sopt_valsize = sopt_valsize; 1425 if (ep->oid.len < l) { 1426 err = EINVAL; 1427 break; 1428 } 1429 1430 schk = locate_scheduler(ep->nr); 1431 if (!schk) { 1432 D("sched %d not found", ep->nr); 1433 err = EINVAL; 1434 break; 1435 } 1436 1437 if (schk->fp && schk->fp->getconfig) { 1438 if(schk->fp->getconfig(schk, ep)) { 1439 D("Error while trying to get sched params"); 1440 err = EINVAL; 1441 break; 1442 } 1443 ep->oid.len = l; 1444 err = sooptcopyout(sopt, ep, l); 1445 } 1446 }while(0); 1447 free(ep, M_DUMMYNET); 1448 1449 return err; 1450 } 1451 1452 /* Configure AQM for flowset 'fs'. 1453 * extra parameters are passed from userland. 1454 */ 1455 static int 1456 config_aqm(struct dn_fsk *fs, struct dn_extra_parms *ep, int busy) 1457 { 1458 int err = 0; 1459 1460 NET_EPOCH_ASSERT(); 1461 1462 do { 1463 /* no configurations */ 1464 if (!ep) { 1465 err = 0; 1466 break; 1467 } 1468 1469 /* no AQM for this flowset*/ 1470 if (!strcmp(ep->name,"")) { 1471 err = 0; 1472 break; 1473 } 1474 if (ep->oid.len < sizeof(*ep)) { 1475 D("short aqm len %d", ep->oid.len); 1476 err = EINVAL; 1477 break; 1478 } 1479 1480 if (busy) { 1481 D("Unable to configure flowset, flowset busy!"); 1482 err = EINVAL; 1483 break; 1484 } 1485 1486 /* deconfigure old aqm if exist */ 1487 if (fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig) { 1488 aqm_cleanup_deconfig_fs(fs); 1489 } 1490 1491 if (!(fs->aqmfp = find_aqm_type(0, ep->name))) { 1492 D("AQM functions not found for type %s!", ep->name); 1493 fs->fs.flags &= ~DN_IS_AQM; 1494 err = EINVAL; 1495 break; 1496 } else 1497 fs->fs.flags |= DN_IS_AQM; 1498 1499 if (ep->oid.subtype != DN_AQM_PARAMS) { 1500 D("Wrong subtype"); 1501 err = EINVAL; 1502 break; 1503 } 1504 1505 if (fs->aqmfp->config) { 1506 err = fs->aqmfp->config(fs, ep, ep->oid.len); 1507 if (err) { 1508 D("Unable to configure AQM for FS %d", fs->fs.fs_nr ); 1509 fs->fs.flags &= ~DN_IS_AQM; 1510 fs->aqmfp = NULL; 1511 break; 1512 } 1513 } 1514 } while(0); 1515 1516 return err; 1517 } 1518 #endif 1519 1520 /* 1521 * Configuration -- to preserve backward compatibility we use 1522 * the following scheme (N is 65536) 1523 * NUMBER SCHED LINK FLOWSET 1524 * 1 .. N-1 (1)WFQ (2)WFQ (3)queue 1525 * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1 1526 * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1 1527 * 1528 * "pipe i config" configures #1, #2 and #3 1529 * "sched i config" configures #1 and possibly #6 1530 * "queue i config" configures #3 1531 * #1 is configured with 'pipe i config' or 'sched i config' 1532 * #2 is configured with 'pipe i config', and created if not 1533 * existing with 'sched i config' 1534 * #3 is configured with 'queue i config' 1535 * #4 is automatically configured after #1, can only be FIFO 1536 * #5 is automatically configured after #2 1537 * #6 is automatically created when #1 is !MULTIQUEUE, 1538 * and can be updated. 1539 * #7 is automatically configured after #2 1540 */ 1541 1542 /* 1543 * configure a link (and its FIFO instance) 1544 */ 1545 static int 1546 config_link(struct dn_link *p, struct dn_id *arg) 1547 { 1548 int i; 1549 1550 if (p->oid.len != sizeof(*p)) { 1551 D("invalid pipe len %d", p->oid.len); 1552 return EINVAL; 1553 } 1554 i = p->link_nr; 1555 if (i <= 0 || i >= DN_MAX_ID) 1556 return EINVAL; 1557 /* 1558 * The config program passes parameters as follows: 1559 * bw = bits/second (0 means no limits), 1560 * delay = ms, must be translated into ticks. 1561 * qsize = slots/bytes 1562 * burst ??? 1563 */ 1564 p->delay = (p->delay * hz) / 1000; 1565 /* Scale burst size: bytes -> bits * hz */ 1566 p->burst *= 8 * hz; 1567 1568 DN_BH_WLOCK(); 1569 /* do it twice, base link and FIFO link */ 1570 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1571 struct dn_schk *s = locate_scheduler(i); 1572 if (s == NULL) { 1573 DN_BH_WUNLOCK(); 1574 D("sched %d not found", i); 1575 return EINVAL; 1576 } 1577 /* remove profile if exists */ 1578 free(s->profile, M_DUMMYNET); 1579 s->profile = NULL; 1580 1581 /* copy all parameters */ 1582 s->link.oid = p->oid; 1583 s->link.link_nr = i; 1584 s->link.delay = p->delay; 1585 if (s->link.bandwidth != p->bandwidth) { 1586 /* XXX bandwidth changes, need to update red params */ 1587 s->link.bandwidth = p->bandwidth; 1588 update_red(s); 1589 } 1590 s->link.burst = p->burst; 1591 schk_reset_credit(s); 1592 } 1593 V_dn_cfg.id++; 1594 DN_BH_WUNLOCK(); 1595 return 0; 1596 } 1597 1598 /* 1599 * configure a flowset. Can be called from inside with locked=1, 1600 */ 1601 static struct dn_fsk * 1602 config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) 1603 { 1604 int i; 1605 struct dn_fsk *fs; 1606 #ifdef NEW_AQM 1607 struct dn_extra_parms *ep; 1608 #endif 1609 1610 if (nfs->oid.len != sizeof(*nfs)) { 1611 D("invalid flowset len %d", nfs->oid.len); 1612 return NULL; 1613 } 1614 i = nfs->fs_nr; 1615 if (i <= 0 || i >= 3*DN_MAX_ID) 1616 return NULL; 1617 #ifdef NEW_AQM 1618 ep = NULL; 1619 if (arg != NULL) { 1620 ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT); 1621 if (ep == NULL) 1622 return (NULL); 1623 memcpy(ep, arg, sizeof(*ep)); 1624 } 1625 #endif 1626 ND("flowset %d", i); 1627 /* XXX other sanity checks */ 1628 if (nfs->flags & DN_QSIZE_BYTES) { 1629 ipdn_bound_var(&nfs->qsize, 16384, 1630 1500, V_dn_cfg.byte_limit, NULL); // "queue byte size"); 1631 } else { 1632 ipdn_bound_var(&nfs->qsize, 50, 1633 1, V_dn_cfg.slot_limit, NULL); // "queue slot size"); 1634 } 1635 if (nfs->flags & DN_HAVE_MASK) { 1636 /* make sure we have some buckets */ 1637 ipdn_bound_var((int *)&nfs->buckets, V_dn_cfg.hash_size, 1638 1, V_dn_cfg.max_hash_size, "flowset buckets"); 1639 } else { 1640 nfs->buckets = 1; /* we only need 1 */ 1641 } 1642 if (!locked) 1643 DN_BH_WLOCK(); 1644 do { /* exit with break when done */ 1645 struct dn_schk *s; 1646 int flags = nfs->sched_nr ? DNHT_INSERT : 0; 1647 int j; 1648 int oldc = V_dn_cfg.fsk_count; 1649 fs = dn_ht_find(V_dn_cfg.fshash, i, flags, NULL); 1650 if (fs == NULL) { 1651 D("missing sched for flowset %d", i); 1652 break; 1653 } 1654 /* grab some defaults from the existing one */ 1655 if (nfs->sched_nr == 0) /* reuse */ 1656 nfs->sched_nr = fs->fs.sched_nr; 1657 for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) { 1658 if (nfs->par[j] == -1) /* reuse */ 1659 nfs->par[j] = fs->fs.par[j]; 1660 } 1661 if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) { 1662 ND("flowset %d unchanged", i); 1663 #ifdef NEW_AQM 1664 if (ep != NULL) { 1665 /* 1666 * Reconfigure AQM as the parameters can be changed. 1667 * We consider the flowset as busy if it has scheduler 1668 * instance(s). 1669 */ 1670 s = locate_scheduler(nfs->sched_nr); 1671 config_aqm(fs, ep, s != NULL && s->siht != NULL); 1672 } 1673 #endif 1674 break; /* no change, nothing to do */ 1675 } 1676 if (oldc != V_dn_cfg.fsk_count) /* new item */ 1677 V_dn_cfg.id++; 1678 s = locate_scheduler(nfs->sched_nr); 1679 /* detach from old scheduler if needed, preserving 1680 * queues if we need to reattach. Then update the 1681 * configuration, and possibly attach to the new sched. 1682 */ 1683 DX(2, "fs %d changed sched %d@%p to %d@%p", 1684 fs->fs.fs_nr, 1685 fs->fs.sched_nr, fs->sched, nfs->sched_nr, s); 1686 if (fs->sched) { 1687 int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY); 1688 flags |= DN_DESTROY; /* XXX temporary */ 1689 fsk_detach(fs, flags); 1690 } 1691 fs->fs = *nfs; /* copy configuration */ 1692 #ifdef NEW_AQM 1693 fs->aqmfp = NULL; 1694 if (ep != NULL) 1695 config_aqm(fs, ep, s != NULL && 1696 s->siht != NULL); 1697 #endif 1698 if (s != NULL) 1699 fsk_attach(fs, s); 1700 } while (0); 1701 if (!locked) 1702 DN_BH_WUNLOCK(); 1703 #ifdef NEW_AQM 1704 free(ep, M_TEMP); 1705 #endif 1706 return fs; 1707 } 1708 1709 /* 1710 * config/reconfig a scheduler and its FIFO variant. 1711 * For !MULTIQUEUE schedulers, also set up the flowset. 1712 * 1713 * On reconfigurations (detected because s->fp is set), 1714 * detach existing flowsets preserving traffic, preserve link, 1715 * and delete the old scheduler creating a new one. 1716 */ 1717 static int 1718 config_sched(struct dn_sch *_nsch, struct dn_id *arg) 1719 { 1720 struct dn_schk *s; 1721 struct schk_new_arg a; /* argument for schk_new */ 1722 int i; 1723 struct dn_link p; /* copy of oldlink */ 1724 struct dn_profile *pf = NULL; /* copy of old link profile */ 1725 /* Used to preserv mask parameter */ 1726 struct ipfw_flow_id new_mask; 1727 int new_buckets = 0; 1728 int new_flags = 0; 1729 int pipe_cmd; 1730 int err = ENOMEM; 1731 1732 NET_EPOCH_ASSERT(); 1733 1734 a.sch = _nsch; 1735 if (a.sch->oid.len != sizeof(*a.sch)) { 1736 D("bad sched len %d", a.sch->oid.len); 1737 return EINVAL; 1738 } 1739 i = a.sch->sched_nr; 1740 if (i <= 0 || i >= DN_MAX_ID) 1741 return EINVAL; 1742 /* make sure we have some buckets */ 1743 if (a.sch->flags & DN_HAVE_MASK) 1744 ipdn_bound_var((int *)&a.sch->buckets, V_dn_cfg.hash_size, 1745 1, V_dn_cfg.max_hash_size, "sched buckets"); 1746 /* XXX other sanity checks */ 1747 bzero(&p, sizeof(p)); 1748 1749 pipe_cmd = a.sch->flags & DN_PIPE_CMD; 1750 a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set? 1751 if (pipe_cmd) { 1752 /* Copy mask parameter */ 1753 new_mask = a.sch->sched_mask; 1754 new_buckets = a.sch->buckets; 1755 new_flags = a.sch->flags; 1756 } 1757 DN_BH_WLOCK(); 1758 again: /* run twice, for wfq and fifo */ 1759 /* 1760 * lookup the type. If not supplied, use the previous one 1761 * or default to WF2Q+. Otherwise, return an error. 1762 */ 1763 V_dn_cfg.id++; 1764 a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name); 1765 if (a.fp != NULL) { 1766 /* found. Lookup or create entry */ 1767 s = dn_ht_find(V_dn_cfg.schedhash, i, DNHT_INSERT, &a); 1768 } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) { 1769 /* No type. search existing s* or retry with WF2Q+ */ 1770 s = dn_ht_find(V_dn_cfg.schedhash, i, 0, &a); 1771 if (s != NULL) { 1772 a.fp = s->fp; 1773 /* Scheduler exists, skip to FIFO scheduler 1774 * if command was pipe config... 1775 */ 1776 if (pipe_cmd) 1777 goto next; 1778 } else { 1779 /* New scheduler, create a wf2q+ with no mask 1780 * if command was pipe config... 1781 */ 1782 if (pipe_cmd) { 1783 /* clear mask parameter */ 1784 bzero(&a.sch->sched_mask, sizeof(new_mask)); 1785 a.sch->buckets = 0; 1786 a.sch->flags &= ~DN_HAVE_MASK; 1787 } 1788 a.sch->oid.subtype = DN_SCHED_WF2QP; 1789 goto again; 1790 } 1791 } else { 1792 D("invalid scheduler type %d %s", 1793 a.sch->oid.subtype, a.sch->name); 1794 err = EINVAL; 1795 goto error; 1796 } 1797 /* normalize name and subtype */ 1798 a.sch->oid.subtype = a.fp->type; 1799 bzero(a.sch->name, sizeof(a.sch->name)); 1800 strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name)); 1801 if (s == NULL) { 1802 D("cannot allocate scheduler %d", i); 1803 goto error; 1804 } 1805 /* restore existing link if any */ 1806 if (p.link_nr) { 1807 s->link = p; 1808 if (!pf || pf->link_nr != p.link_nr) { /* no saved value */ 1809 s->profile = NULL; /* XXX maybe not needed */ 1810 } else { 1811 s->profile = malloc(sizeof(struct dn_profile), 1812 M_DUMMYNET, M_NOWAIT | M_ZERO); 1813 if (s->profile == NULL) { 1814 D("cannot allocate profile"); 1815 goto error; //XXX 1816 } 1817 memcpy(s->profile, pf, sizeof(*pf)); 1818 } 1819 } 1820 p.link_nr = 0; 1821 if (s->fp == NULL) { 1822 DX(2, "sched %d new type %s", i, a.fp->name); 1823 } else if (s->fp != a.fp || 1824 bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) { 1825 /* already existing. */ 1826 DX(2, "sched %d type changed from %s to %s", 1827 i, s->fp->name, a.fp->name); 1828 DX(4, " type/sub %d/%d -> %d/%d", 1829 s->sch.oid.type, s->sch.oid.subtype, 1830 a.sch->oid.type, a.sch->oid.subtype); 1831 if (s->link.link_nr == 0) 1832 D("XXX WARNING link 0 for sched %d", i); 1833 p = s->link; /* preserve link */ 1834 if (s->profile) {/* preserve profile */ 1835 if (!pf) 1836 pf = malloc(sizeof(*pf), 1837 M_DUMMYNET, M_NOWAIT | M_ZERO); 1838 if (pf) /* XXX should issue a warning otherwise */ 1839 memcpy(pf, s->profile, sizeof(*pf)); 1840 } 1841 /* remove from the hash */ 1842 dn_ht_find(V_dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 1843 /* Detach flowsets, preserve queues. */ 1844 // schk_delete_cb(s, NULL); 1845 // XXX temporarily, kill queues 1846 schk_delete_cb(s, (void *)DN_DESTROY); 1847 goto again; 1848 } else { 1849 DX(4, "sched %d unchanged type %s", i, a.fp->name); 1850 } 1851 /* complete initialization */ 1852 s->sch = *a.sch; 1853 s->fp = a.fp; 1854 s->cfg = arg; 1855 // XXX schk_reset_credit(s); 1856 /* create the internal flowset if needed, 1857 * trying to reuse existing ones if available 1858 */ 1859 if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) { 1860 s->fs = dn_ht_find(V_dn_cfg.fshash, i, 0, NULL); 1861 if (!s->fs) { 1862 struct dn_fs fs; 1863 bzero(&fs, sizeof(fs)); 1864 set_oid(&fs.oid, DN_FS, sizeof(fs)); 1865 fs.fs_nr = i + DN_MAX_ID; 1866 fs.sched_nr = i; 1867 s->fs = config_fs(&fs, NULL, 1 /* locked */); 1868 } 1869 if (!s->fs) { 1870 schk_delete_cb(s, (void *)DN_DESTROY); 1871 D("error creating internal fs for %d", i); 1872 goto error; 1873 } 1874 } 1875 /* call init function after the flowset is created */ 1876 if (s->fp->config) 1877 s->fp->config(s); 1878 update_fs(s); 1879 next: 1880 if (i < DN_MAX_ID) { /* now configure the FIFO instance */ 1881 i += DN_MAX_ID; 1882 if (pipe_cmd) { 1883 /* Restore mask parameter for FIFO */ 1884 a.sch->sched_mask = new_mask; 1885 a.sch->buckets = new_buckets; 1886 a.sch->flags = new_flags; 1887 } else { 1888 /* sched config shouldn't modify the FIFO scheduler */ 1889 if (dn_ht_find(V_dn_cfg.schedhash, i, 0, &a) != NULL) { 1890 /* FIFO already exist, don't touch it */ 1891 err = 0; /* and this is not an error */ 1892 goto error; 1893 } 1894 } 1895 a.sch->sched_nr = i; 1896 a.sch->oid.subtype = DN_SCHED_FIFO; 1897 bzero(a.sch->name, sizeof(a.sch->name)); 1898 goto again; 1899 } 1900 err = 0; 1901 error: 1902 DN_BH_WUNLOCK(); 1903 free(pf, M_DUMMYNET); 1904 return err; 1905 } 1906 1907 /* 1908 * attach a profile to a link 1909 */ 1910 static int 1911 config_profile(struct dn_profile *pf, struct dn_id *arg) 1912 { 1913 struct dn_schk *s; 1914 int i, olen, err = 0; 1915 1916 if (pf->oid.len < sizeof(*pf)) { 1917 D("short profile len %d", pf->oid.len); 1918 return EINVAL; 1919 } 1920 i = pf->link_nr; 1921 if (i <= 0 || i >= DN_MAX_ID) 1922 return EINVAL; 1923 /* XXX other sanity checks */ 1924 DN_BH_WLOCK(); 1925 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1926 s = locate_scheduler(i); 1927 1928 if (s == NULL) { 1929 err = EINVAL; 1930 break; 1931 } 1932 V_dn_cfg.id++; 1933 /* 1934 * If we had a profile and the new one does not fit, 1935 * or it is deleted, then we need to free memory. 1936 */ 1937 if (s->profile && (pf->samples_no == 0 || 1938 s->profile->oid.len < pf->oid.len)) { 1939 free(s->profile, M_DUMMYNET); 1940 s->profile = NULL; 1941 } 1942 if (pf->samples_no == 0) 1943 continue; 1944 /* 1945 * new profile, possibly allocate memory 1946 * and copy data. 1947 */ 1948 if (s->profile == NULL) 1949 s->profile = malloc(pf->oid.len, 1950 M_DUMMYNET, M_NOWAIT | M_ZERO); 1951 if (s->profile == NULL) { 1952 D("no memory for profile %d", i); 1953 err = ENOMEM; 1954 break; 1955 } 1956 /* preserve larger length XXX double check */ 1957 olen = s->profile->oid.len; 1958 if (olen < pf->oid.len) 1959 olen = pf->oid.len; 1960 memcpy(s->profile, pf, pf->oid.len); 1961 s->profile->oid.len = olen; 1962 } 1963 DN_BH_WUNLOCK(); 1964 return err; 1965 } 1966 1967 /* 1968 * Delete all objects: 1969 */ 1970 static void 1971 dummynet_flush(void) 1972 { 1973 1974 /* delete all schedulers and related links/queues/flowsets */ 1975 dn_ht_scan(V_dn_cfg.schedhash, schk_delete_cb, 1976 (void *)(uintptr_t)DN_DELETE_FS); 1977 /* delete all remaining (unlinked) flowsets */ 1978 DX(4, "still %d unlinked fs", V_dn_cfg.fsk_count); 1979 dn_ht_free(V_dn_cfg.fshash, DNHT_REMOVE); 1980 fsk_detach_list(&V_dn_cfg.fsu, DN_DELETE_FS); 1981 /* Reinitialize system heap... */ 1982 heap_init(&V_dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 1983 } 1984 1985 /* 1986 * Main handler for configuration. We are guaranteed to be called 1987 * with an oid which is at least a dn_id. 1988 * - the first object is the command (config, delete, flush, ...) 1989 * - config_link must be issued after the corresponding config_sched 1990 * - parameters (DN_TXT) for an object must precede the object 1991 * processed on a config_sched. 1992 */ 1993 int 1994 do_config(void *p, int l) 1995 { 1996 struct dn_id o; 1997 union { 1998 struct dn_profile profile; 1999 struct dn_fs fs; 2000 struct dn_link link; 2001 struct dn_sch sched; 2002 } *dn; 2003 struct dn_id *arg; 2004 uintptr_t a; 2005 int err, err2, off; 2006 2007 memcpy(&o, p, sizeof(o)); 2008 if (o.id != DN_API_VERSION) { 2009 D("invalid api version got %d need %d", o.id, DN_API_VERSION); 2010 return EINVAL; 2011 } 2012 arg = NULL; 2013 dn = NULL; 2014 for (off = 0; l >= sizeof(o); memcpy(&o, (char *)p + off, sizeof(o))) { 2015 if (o.len < sizeof(o) || l < o.len) { 2016 D("bad len o.len %d len %d", o.len, l); 2017 err = EINVAL; 2018 break; 2019 } 2020 l -= o.len; 2021 err = 0; 2022 switch (o.type) { 2023 default: 2024 D("cmd %d not implemented", o.type); 2025 break; 2026 2027 #ifdef EMULATE_SYSCTL 2028 /* sysctl emulation. 2029 * if we recognize the command, jump to the correct 2030 * handler and return 2031 */ 2032 case DN_SYSCTL_SET: 2033 err = kesysctl_emu_set(p, l); 2034 return err; 2035 #endif 2036 2037 case DN_CMD_CONFIG: /* simply a header */ 2038 break; 2039 2040 case DN_CMD_DELETE: 2041 /* the argument is in the first uintptr_t after o */ 2042 if (o.len < sizeof(o) + sizeof(a)) { 2043 err = EINVAL; 2044 break; 2045 } 2046 memcpy(&a, (char *)p + off + sizeof(o), sizeof(a)); 2047 switch (o.subtype) { 2048 case DN_LINK: 2049 /* delete base and derived schedulers */ 2050 DN_BH_WLOCK(); 2051 err = delete_schk(a); 2052 err2 = delete_schk(a + DN_MAX_ID); 2053 DN_BH_WUNLOCK(); 2054 if (!err) 2055 err = err2; 2056 break; 2057 2058 default: 2059 D("invalid delete type %d", o.subtype); 2060 err = EINVAL; 2061 break; 2062 2063 case DN_FS: 2064 err = (a < 1 || a >= DN_MAX_ID) ? 2065 EINVAL : delete_fs(a, 0) ; 2066 break; 2067 } 2068 break; 2069 2070 case DN_CMD_FLUSH: 2071 DN_BH_WLOCK(); 2072 dummynet_flush(); 2073 DN_BH_WUNLOCK(); 2074 break; 2075 case DN_TEXT: /* store argument of next block */ 2076 free(arg, M_TEMP); 2077 arg = malloc(o.len, M_TEMP, M_NOWAIT); 2078 if (arg == NULL) { 2079 err = ENOMEM; 2080 break; 2081 } 2082 memcpy(arg, (char *)p + off, o.len); 2083 break; 2084 case DN_LINK: 2085 if (dn == NULL) 2086 dn = malloc(sizeof(*dn), M_TEMP, M_NOWAIT); 2087 if (dn == NULL) { 2088 err = ENOMEM; 2089 break; 2090 } 2091 memcpy(&dn->link, (char *)p + off, sizeof(dn->link)); 2092 err = config_link(&dn->link, arg); 2093 break; 2094 case DN_PROFILE: 2095 if (dn == NULL) 2096 dn = malloc(sizeof(*dn), M_TEMP, M_NOWAIT); 2097 if (dn == NULL) { 2098 err = ENOMEM; 2099 break; 2100 } 2101 memcpy(&dn->profile, (char *)p + off, 2102 sizeof(dn->profile)); 2103 err = config_profile(&dn->profile, arg); 2104 break; 2105 case DN_SCH: 2106 if (dn == NULL) 2107 dn = malloc(sizeof(*dn), M_TEMP, M_NOWAIT); 2108 if (dn == NULL) { 2109 err = ENOMEM; 2110 break; 2111 } 2112 memcpy(&dn->sched, (char *)p + off, 2113 sizeof(dn->sched)); 2114 err = config_sched(&dn->sched, arg); 2115 break; 2116 case DN_FS: 2117 if (dn == NULL) 2118 dn = malloc(sizeof(*dn), M_TEMP, M_NOWAIT); 2119 if (dn == NULL) { 2120 err = ENOMEM; 2121 break; 2122 } 2123 memcpy(&dn->fs, (char *)p + off, sizeof(dn->fs)); 2124 err = (NULL == config_fs(&dn->fs, arg, 0)); 2125 break; 2126 } 2127 if (err != 0) 2128 break; 2129 off += o.len; 2130 } 2131 free(arg, M_TEMP); 2132 free(dn, M_TEMP); 2133 return err; 2134 } 2135 2136 static int 2137 compute_space(struct dn_id *cmd, struct copy_args *a) 2138 { 2139 int x = 0, need = 0; 2140 int profile_size = sizeof(struct dn_profile) - 2141 ED_MAX_SAMPLES_NO*sizeof(int); 2142 2143 /* NOTE about compute space: 2144 * NP = V_dn_cfg.schk_count 2145 * NSI = V_dn_cfg.si_count 2146 * NF = V_dn_cfg.fsk_count 2147 * NQ = V_dn_cfg.queue_count 2148 * - ipfw pipe show 2149 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 2150 * link, scheduler template, flowset 2151 * integrated in scheduler and header 2152 * for flowset list 2153 * (NSI)*(dn_flow) all scheduler instance (includes 2154 * the queue instance) 2155 * - ipfw sched show 2156 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 2157 * link, scheduler template, flowset 2158 * integrated in scheduler and header 2159 * for flowset list 2160 * (NSI * dn_flow) all scheduler instances 2161 * (NF * sizeof(uint_32)) space for flowset list linked to scheduler 2162 * (NQ * dn_queue) all queue [XXXfor now not listed] 2163 * - ipfw queue show 2164 * (NF * dn_fs) all flowset 2165 * (NQ * dn_queue) all queues 2166 */ 2167 switch (cmd->subtype) { 2168 default: 2169 return -1; 2170 /* XXX where do LINK and SCH differ ? */ 2171 /* 'ipfw sched show' could list all queues associated to 2172 * a scheduler. This feature for now is disabled 2173 */ 2174 case DN_LINK: /* pipe show */ 2175 x = DN_C_LINK | DN_C_SCH | DN_C_FLOW; 2176 need += V_dn_cfg.schk_count * 2177 (sizeof(struct dn_fs) + profile_size) / 2; 2178 need += V_dn_cfg.fsk_count * sizeof(uint32_t); 2179 break; 2180 case DN_SCH: /* sched show */ 2181 need += V_dn_cfg.schk_count * 2182 (sizeof(struct dn_fs) + profile_size) / 2; 2183 need += V_dn_cfg.fsk_count * sizeof(uint32_t); 2184 x = DN_C_SCH | DN_C_LINK | DN_C_FLOW; 2185 break; 2186 case DN_FS: /* queue show */ 2187 x = DN_C_FS | DN_C_QUEUE; 2188 break; 2189 case DN_GET_COMPAT: /* compatibility mode */ 2190 need = dn_compat_calc_size(); 2191 break; 2192 } 2193 a->flags = x; 2194 if (x & DN_C_SCH) { 2195 need += V_dn_cfg.schk_count * sizeof(struct dn_sch) / 2; 2196 /* NOT also, each fs might be attached to a sched */ 2197 need += V_dn_cfg.schk_count * sizeof(struct dn_id) / 2; 2198 } 2199 if (x & DN_C_FS) 2200 need += V_dn_cfg.fsk_count * sizeof(struct dn_fs); 2201 if (x & DN_C_LINK) { 2202 need += V_dn_cfg.schk_count * sizeof(struct dn_link) / 2; 2203 } 2204 /* 2205 * When exporting a queue to userland, only pass up the 2206 * struct dn_flow, which is the only visible part. 2207 */ 2208 2209 if (x & DN_C_QUEUE) 2210 need += V_dn_cfg.queue_count * sizeof(struct dn_flow); 2211 if (x & DN_C_FLOW) 2212 need += V_dn_cfg.si_count * (sizeof(struct dn_flow)); 2213 return need; 2214 } 2215 2216 /* 2217 * If compat != NULL dummynet_get is called in compatibility mode. 2218 * *compat will be the pointer to the buffer to pass to ipfw 2219 */ 2220 int 2221 dummynet_get(struct sockopt *sopt, void **compat) 2222 { 2223 int have, i, need, error; 2224 char *start = NULL, *buf; 2225 size_t sopt_valsize; 2226 struct dn_id *cmd; 2227 struct copy_args a; 2228 struct copy_range r; 2229 int l = sizeof(struct dn_id); 2230 2231 bzero(&a, sizeof(a)); 2232 bzero(&r, sizeof(r)); 2233 2234 /* save and restore original sopt_valsize around copyin */ 2235 sopt_valsize = sopt->sopt_valsize; 2236 2237 cmd = &r.o; 2238 2239 if (!compat) { 2240 /* copy at least an oid, and possibly a full object */ 2241 error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); 2242 sopt->sopt_valsize = sopt_valsize; 2243 if (error) 2244 goto done; 2245 l = cmd->len; 2246 #ifdef EMULATE_SYSCTL 2247 /* sysctl emulation. */ 2248 if (cmd->type == DN_SYSCTL_GET) 2249 return kesysctl_emu_get(sopt); 2250 #endif 2251 if (l > sizeof(r)) { 2252 /* request larger than default, allocate buffer */ 2253 cmd = malloc(l, M_DUMMYNET, M_NOWAIT); 2254 if (cmd == NULL) { 2255 error = ENOMEM; 2256 goto done; 2257 } 2258 error = sooptcopyin(sopt, cmd, l, l); 2259 sopt->sopt_valsize = sopt_valsize; 2260 if (error) 2261 goto done; 2262 } 2263 } else { /* compatibility */ 2264 error = 0; 2265 cmd->type = DN_CMD_GET; 2266 cmd->len = sizeof(struct dn_id); 2267 cmd->subtype = DN_GET_COMPAT; 2268 // cmd->id = sopt_valsize; 2269 D("compatibility mode"); 2270 } 2271 2272 #ifdef NEW_AQM 2273 /* get AQM params */ 2274 if(cmd->subtype == DN_AQM_PARAMS) { 2275 error = get_aqm_parms(sopt); 2276 goto done; 2277 /* get Scheduler params */ 2278 } else if (cmd->subtype == DN_SCH_PARAMS) { 2279 error = get_sched_parms(sopt); 2280 goto done; 2281 } 2282 #endif 2283 2284 a.extra = (struct copy_range *)cmd; 2285 if (cmd->len == sizeof(*cmd)) { /* no range, create a default */ 2286 uint32_t *rp = (uint32_t *)(cmd + 1); 2287 cmd->len += 2* sizeof(uint32_t); 2288 rp[0] = 1; 2289 rp[1] = DN_MAX_ID - 1; 2290 if (cmd->subtype == DN_LINK) { 2291 rp[0] += DN_MAX_ID; 2292 rp[1] += DN_MAX_ID; 2293 } 2294 } 2295 /* Count space (under lock) and allocate (outside lock). 2296 * Exit with lock held if we manage to get enough buffer. 2297 * Try a few times then give up. 2298 */ 2299 for (have = 0, i = 0; i < 10; i++) { 2300 DN_BH_WLOCK(); 2301 need = compute_space(cmd, &a); 2302 2303 /* if there is a range, ignore value from compute_space() */ 2304 if (l > sizeof(*cmd)) 2305 need = sopt_valsize - sizeof(*cmd); 2306 2307 if (need < 0) { 2308 DN_BH_WUNLOCK(); 2309 error = EINVAL; 2310 goto done; 2311 } 2312 need += sizeof(*cmd); 2313 cmd->id = need; 2314 if (have >= need) 2315 break; 2316 2317 DN_BH_WUNLOCK(); 2318 free(start, M_DUMMYNET); 2319 start = NULL; 2320 if (need > sopt_valsize) 2321 break; 2322 2323 have = need; 2324 start = malloc(have, M_DUMMYNET, M_NOWAIT | M_ZERO); 2325 } 2326 2327 if (start == NULL) { 2328 if (compat) { 2329 *compat = NULL; 2330 error = 1; // XXX 2331 } else { 2332 error = sooptcopyout(sopt, cmd, sizeof(*cmd)); 2333 } 2334 goto done; 2335 } 2336 ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " 2337 "%d:%d si %d, %d:%d queues %d", 2338 V_dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH, 2339 V_dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK, 2340 V_dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS, 2341 V_dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I, 2342 V_dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE); 2343 sopt->sopt_valsize = sopt_valsize; 2344 a.type = cmd->subtype; 2345 2346 if (compat == NULL) { 2347 memcpy(start, cmd, sizeof(*cmd)); 2348 ((struct dn_id*)(start))->len = sizeof(struct dn_id); 2349 buf = start + sizeof(*cmd); 2350 } else 2351 buf = start; 2352 a.start = &buf; 2353 a.end = start + have; 2354 /* start copying other objects */ 2355 if (compat) { 2356 a.type = DN_COMPAT_PIPE; 2357 dn_ht_scan(V_dn_cfg.schedhash, copy_data_helper_compat, &a); 2358 a.type = DN_COMPAT_QUEUE; 2359 dn_ht_scan(V_dn_cfg.fshash, copy_data_helper_compat, &a); 2360 } else if (a.type == DN_FS) { 2361 dn_ht_scan(V_dn_cfg.fshash, copy_data_helper, &a); 2362 } else { 2363 dn_ht_scan(V_dn_cfg.schedhash, copy_data_helper, &a); 2364 } 2365 DN_BH_WUNLOCK(); 2366 2367 if (compat) { 2368 *compat = start; 2369 sopt->sopt_valsize = buf - start; 2370 /* free() is done by ip_dummynet_compat() */ 2371 start = NULL; //XXX hack 2372 } else { 2373 error = sooptcopyout(sopt, start, buf - start); 2374 } 2375 done: 2376 if (cmd != &r.o) 2377 free(cmd, M_DUMMYNET); 2378 free(start, M_DUMMYNET); 2379 return error; 2380 } 2381 2382 /* Callback called on scheduler instance to delete it if idle */ 2383 static int 2384 drain_scheduler_cb(void *_si, void *arg) 2385 { 2386 struct dn_sch_inst *si = _si; 2387 2388 if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL) 2389 return 0; 2390 2391 if (si->sched->fp->flags & DN_MULTIQUEUE) { 2392 if (si->q_count == 0) 2393 return si_destroy(si, NULL); 2394 else 2395 return 0; 2396 } else { /* !DN_MULTIQUEUE */ 2397 if ((si+1)->ni.length == 0) 2398 return si_destroy(si, NULL); 2399 else 2400 return 0; 2401 } 2402 return 0; /* unreachable */ 2403 } 2404 2405 /* Callback called on scheduler to check if it has instances */ 2406 static int 2407 drain_scheduler_sch_cb(void *_s, void *arg) 2408 { 2409 struct dn_schk *s = _s; 2410 2411 if (s->sch.flags & DN_HAVE_MASK) { 2412 dn_ht_scan_bucket(s->siht, &s->drain_bucket, 2413 drain_scheduler_cb, NULL); 2414 s->drain_bucket++; 2415 } else { 2416 if (s->siht) { 2417 if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL) 2418 s->siht = NULL; 2419 } 2420 } 2421 return 0; 2422 } 2423 2424 /* Called every tick, try to delete a 'bucket' of scheduler */ 2425 void 2426 dn_drain_scheduler(void) 2427 { 2428 dn_ht_scan_bucket(V_dn_cfg.schedhash, &V_dn_cfg.drain_sch, 2429 drain_scheduler_sch_cb, NULL); 2430 V_dn_cfg.drain_sch++; 2431 } 2432 2433 /* Callback called on queue to delete if it is idle */ 2434 static int 2435 drain_queue_cb(void *_q, void *arg) 2436 { 2437 struct dn_queue *q = _q; 2438 2439 if (q->ni.length == 0) { 2440 dn_delete_queue(q, DN_DESTROY); 2441 return DNHT_SCAN_DEL; /* queue is deleted */ 2442 } 2443 2444 return 0; /* queue isn't deleted */ 2445 } 2446 2447 /* Callback called on flowset used to check if it has queues */ 2448 static int 2449 drain_queue_fs_cb(void *_fs, void *arg) 2450 { 2451 struct dn_fsk *fs = _fs; 2452 2453 if (fs->fs.flags & DN_QHT_HASH) { 2454 /* Flowset has a hash table for queues */ 2455 dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, 2456 drain_queue_cb, NULL); 2457 fs->drain_bucket++; 2458 } else { 2459 /* No hash table for this flowset, null the pointer 2460 * if the queue is deleted 2461 */ 2462 if (fs->qht) { 2463 if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL) 2464 fs->qht = NULL; 2465 } 2466 } 2467 return 0; 2468 } 2469 2470 /* Called every tick, try to delete a 'bucket' of queue */ 2471 void 2472 dn_drain_queue(void) 2473 { 2474 /* scan a bucket of flowset */ 2475 dn_ht_scan_bucket(V_dn_cfg.fshash, &V_dn_cfg.drain_fs, 2476 drain_queue_fs_cb, NULL); 2477 V_dn_cfg.drain_fs++; 2478 } 2479 2480 /* 2481 * Handler for the various dummynet socket options 2482 */ 2483 static int 2484 ip_dn_ctl(struct sockopt *sopt) 2485 { 2486 struct epoch_tracker et; 2487 void *p = NULL; 2488 int error, l; 2489 2490 error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); 2491 if (error) 2492 return (error); 2493 2494 /* Disallow sets in really-really secure mode. */ 2495 if (sopt->sopt_dir == SOPT_SET) { 2496 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 2497 if (error) 2498 return (error); 2499 } 2500 2501 NET_EPOCH_ENTER(et); 2502 2503 switch (sopt->sopt_name) { 2504 default : 2505 D("dummynet: unknown option %d", sopt->sopt_name); 2506 error = EINVAL; 2507 break; 2508 2509 case IP_DUMMYNET_FLUSH: 2510 case IP_DUMMYNET_CONFIGURE: 2511 case IP_DUMMYNET_DEL: /* remove a pipe or queue */ 2512 case IP_DUMMYNET_GET: 2513 D("dummynet: compat option %d", sopt->sopt_name); 2514 error = ip_dummynet_compat(sopt); 2515 break; 2516 2517 case IP_DUMMYNET3 : 2518 if (sopt->sopt_dir == SOPT_GET) { 2519 error = dummynet_get(sopt, NULL); 2520 break; 2521 } 2522 l = sopt->sopt_valsize; 2523 if (l < sizeof(struct dn_id) || l > 12000) { 2524 D("argument len %d invalid", l); 2525 break; 2526 } 2527 p = malloc(l, M_TEMP, M_NOWAIT); 2528 if (p == NULL) { 2529 error = ENOMEM; 2530 break; 2531 } 2532 error = sooptcopyin(sopt, p, l, l); 2533 if (error) 2534 break ; 2535 error = do_config(p, l); 2536 break; 2537 } 2538 2539 free(p, M_TEMP); 2540 2541 NET_EPOCH_EXIT(et); 2542 2543 return error ; 2544 } 2545 2546 static void 2547 ip_dn_vnet_init(void) 2548 { 2549 if (V_dn_cfg.init_done) 2550 return; 2551 V_dn_cfg.init_done = 1; 2552 /* Set defaults here. MSVC does not accept initializers, 2553 * and this is also useful for vimages 2554 */ 2555 /* queue limits */ 2556 V_dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */ 2557 V_dn_cfg.byte_limit = 1024 * 1024; 2558 V_dn_cfg.expire = 1; 2559 2560 /* RED parameters */ 2561 V_dn_cfg.red_lookup_depth = 256; /* default lookup table depth */ 2562 V_dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */ 2563 V_dn_cfg.red_max_pkt_size = 1500; /* default max packet size */ 2564 2565 /* hash tables */ 2566 V_dn_cfg.max_hash_size = 65536; /* max in the hash tables */ 2567 V_dn_cfg.hash_size = 64; /* default hash size */ 2568 2569 /* create hash tables for schedulers and flowsets. 2570 * In both we search by key and by pointer. 2571 */ 2572 V_dn_cfg.schedhash = dn_ht_init(NULL, V_dn_cfg.hash_size, 2573 offsetof(struct dn_schk, schk_next), 2574 schk_hash, schk_match, schk_new); 2575 V_dn_cfg.fshash = dn_ht_init(NULL, V_dn_cfg.hash_size, 2576 offsetof(struct dn_fsk, fsk_next), 2577 fsk_hash, fsk_match, fsk_new); 2578 2579 /* bucket index to drain object */ 2580 V_dn_cfg.drain_fs = 0; 2581 V_dn_cfg.drain_sch = 0; 2582 2583 heap_init(&V_dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 2584 SLIST_INIT(&V_dn_cfg.fsu); 2585 2586 DN_LOCK_INIT(); 2587 2588 /* Initialize curr_time adjustment mechanics. */ 2589 getmicrouptime(&V_dn_cfg.prev_t); 2590 } 2591 2592 static void 2593 ip_dn_vnet_destroy(void) 2594 { 2595 DN_BH_WLOCK(); 2596 dummynet_flush(); 2597 DN_BH_WUNLOCK(); 2598 2599 dn_ht_free(V_dn_cfg.schedhash, 0); 2600 dn_ht_free(V_dn_cfg.fshash, 0); 2601 heap_free(&V_dn_cfg.evheap); 2602 2603 DN_LOCK_DESTROY(); 2604 } 2605 2606 static void 2607 ip_dn_init(void) 2608 { 2609 if (dn_tasks_started) 2610 return; 2611 2612 mtx_init(&sched_mtx, "dn_sched", NULL, MTX_DEF); 2613 2614 dn_tasks_started = 1; 2615 TASK_INIT(&dn_task, 0, dummynet_task, NULL); 2616 dn_tq = taskqueue_create_fast("dummynet", M_WAITOK, 2617 taskqueue_thread_enqueue, &dn_tq); 2618 taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); 2619 2620 CK_LIST_INIT(&schedlist); 2621 callout_init(&dn_timeout, 1); 2622 dn_reschedule(); 2623 } 2624 2625 static void 2626 ip_dn_destroy(int last) 2627 { 2628 /* ensure no more callouts are started */ 2629 dn_gone = 1; 2630 2631 /* check for last */ 2632 if (last) { 2633 ND("removing last instance\n"); 2634 ip_dn_ctl_ptr = NULL; 2635 ip_dn_io_ptr = NULL; 2636 } 2637 2638 callout_drain(&dn_timeout); 2639 taskqueue_drain(dn_tq, &dn_task); 2640 taskqueue_free(dn_tq); 2641 } 2642 2643 static int 2644 dummynet_modevent(module_t mod, int type, void *data) 2645 { 2646 2647 if (type == MOD_LOAD) { 2648 if (ip_dn_io_ptr) { 2649 printf("DUMMYNET already loaded\n"); 2650 return EEXIST ; 2651 } 2652 ip_dn_init(); 2653 ip_dn_ctl_ptr = ip_dn_ctl; 2654 ip_dn_io_ptr = dummynet_io; 2655 return 0; 2656 } else if (type == MOD_UNLOAD) { 2657 ip_dn_destroy(1 /* last */); 2658 return 0; 2659 } else 2660 return EOPNOTSUPP; 2661 } 2662 2663 /* modevent helpers for the modules */ 2664 static int 2665 load_dn_sched(struct dn_alg *d) 2666 { 2667 struct dn_alg *s; 2668 2669 if (d == NULL) 2670 return 1; /* error */ 2671 ip_dn_init(); /* just in case, we need the lock */ 2672 2673 /* Check that mandatory funcs exists */ 2674 if (d->enqueue == NULL || d->dequeue == NULL) { 2675 D("missing enqueue or dequeue for %s", d->name); 2676 return 1; 2677 } 2678 2679 /* Search if scheduler already exists */ 2680 mtx_lock(&sched_mtx); 2681 CK_LIST_FOREACH(s, &schedlist, next) { 2682 if (strcmp(s->name, d->name) == 0) { 2683 D("%s already loaded", d->name); 2684 break; /* scheduler already exists */ 2685 } 2686 } 2687 if (s == NULL) 2688 CK_LIST_INSERT_HEAD(&schedlist, d, next); 2689 mtx_unlock(&sched_mtx); 2690 D("dn_sched %s %sloaded", d->name, s ? "not ":""); 2691 return s ? 1 : 0; 2692 } 2693 2694 static int 2695 unload_dn_sched(struct dn_alg *s) 2696 { 2697 struct dn_alg *tmp, *r; 2698 int err = EINVAL; 2699 2700 ND("called for %s", s->name); 2701 2702 mtx_lock(&sched_mtx); 2703 CK_LIST_FOREACH_SAFE(r, &schedlist, next, tmp) { 2704 if (strcmp(s->name, r->name) != 0) 2705 continue; 2706 ND("ref_count = %d", r->ref_count); 2707 err = (r->ref_count != 0) ? EBUSY : 0; 2708 if (err == 0) 2709 CK_LIST_REMOVE(r, next); 2710 break; 2711 } 2712 mtx_unlock(&sched_mtx); 2713 NET_EPOCH_WAIT(); 2714 D("dn_sched %s %sunloaded", s->name, err ? "not ":""); 2715 return err; 2716 } 2717 2718 int 2719 dn_sched_modevent(module_t mod, int cmd, void *arg) 2720 { 2721 struct dn_alg *sch = arg; 2722 2723 if (cmd == MOD_LOAD) 2724 return load_dn_sched(sch); 2725 else if (cmd == MOD_UNLOAD) 2726 return unload_dn_sched(sch); 2727 else 2728 return EINVAL; 2729 } 2730 2731 static moduledata_t dummynet_mod = { 2732 "dummynet", dummynet_modevent, NULL 2733 }; 2734 2735 #define DN_SI_SUB SI_SUB_PROTO_FIREWALL 2736 #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ 2737 DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); 2738 MODULE_DEPEND(dummynet, ipfw, 3, 3, 3); 2739 MODULE_VERSION(dummynet, 3); 2740 2741 /* 2742 * Starting up. Done in order after dummynet_modevent() has been called. 2743 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2744 */ 2745 VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_vnet_init, NULL); 2746 2747 /* 2748 * Shutdown handlers up shop. These are done in REVERSE ORDER, but still 2749 * after dummynet_modevent() has been called. Not called on reboot. 2750 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2751 * or when the module is unloaded. 2752 */ 2753 VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_vnet_destroy, NULL); 2754 2755 #ifdef NEW_AQM 2756 2757 /* modevent helpers for the AQM modules */ 2758 static int 2759 load_dn_aqm(struct dn_aqm *d) 2760 { 2761 struct dn_aqm *aqm=NULL; 2762 2763 if (d == NULL) 2764 return 1; /* error */ 2765 ip_dn_init(); /* just in case, we need the lock */ 2766 2767 /* Check that mandatory funcs exists */ 2768 if (d->enqueue == NULL || d->dequeue == NULL) { 2769 D("missing enqueue or dequeue for %s", d->name); 2770 return 1; 2771 } 2772 2773 mtx_lock(&sched_mtx); 2774 2775 /* Search if AQM already exists */ 2776 CK_LIST_FOREACH(aqm, &aqmlist, next) { 2777 if (strcmp(aqm->name, d->name) == 0) { 2778 D("%s already loaded", d->name); 2779 break; /* AQM already exists */ 2780 } 2781 } 2782 if (aqm == NULL) 2783 CK_LIST_INSERT_HEAD(&aqmlist, d, next); 2784 2785 mtx_unlock(&sched_mtx); 2786 2787 D("dn_aqm %s %sloaded", d->name, aqm ? "not ":""); 2788 return aqm ? 1 : 0; 2789 } 2790 2791 /* Callback to clean up AQM status for queues connected to a flowset 2792 * and then deconfigure the flowset. 2793 * This function is called before an AQM module is unloaded 2794 */ 2795 static int 2796 fs_cleanup(void *_fs, void *arg) 2797 { 2798 struct dn_fsk *fs = _fs; 2799 uint32_t type = *(uint32_t *)arg; 2800 2801 if (fs->aqmfp && fs->aqmfp->type == type) 2802 aqm_cleanup_deconfig_fs(fs); 2803 2804 return 0; 2805 } 2806 2807 static int 2808 unload_dn_aqm(struct dn_aqm *aqm) 2809 { 2810 struct dn_aqm *tmp, *r; 2811 int err = EINVAL; 2812 err = 0; 2813 ND("called for %s", aqm->name); 2814 2815 /* clean up AQM status and deconfig flowset */ 2816 dn_ht_scan(V_dn_cfg.fshash, fs_cleanup, &aqm->type); 2817 2818 mtx_lock(&sched_mtx); 2819 2820 CK_LIST_FOREACH_SAFE(r, &aqmlist, next, tmp) { 2821 if (strcmp(aqm->name, r->name) != 0) 2822 continue; 2823 ND("ref_count = %d", r->ref_count); 2824 err = (r->ref_count != 0 || r->cfg_ref_count != 0) ? EBUSY : 0; 2825 if (err == 0) 2826 CK_LIST_REMOVE(r, next); 2827 break; 2828 } 2829 2830 mtx_unlock(&sched_mtx); 2831 NET_EPOCH_WAIT(); 2832 2833 D("%s %sunloaded", aqm->name, err ? "not ":""); 2834 if (err) 2835 D("ref_count=%d, cfg_ref_count=%d", r->ref_count, r->cfg_ref_count); 2836 return err; 2837 } 2838 2839 int 2840 dn_aqm_modevent(module_t mod, int cmd, void *arg) 2841 { 2842 struct dn_aqm *aqm = arg; 2843 2844 if (cmd == MOD_LOAD) 2845 return load_dn_aqm(aqm); 2846 else if (cmd == MOD_UNLOAD) 2847 return unload_dn_aqm(aqm); 2848 else 2849 return EINVAL; 2850 } 2851 #endif 2852 2853 /* end of file */ 2854