1 /*- 2 * Codel/FQ_Codel and PIE/FQ-PIE Code: 3 * Copyright (C) 2016 Centre for Advanced Internet Architectures, 4 * Swinburne University of Technology, Melbourne, Australia. 5 * Portions of this code were made possible in part by a gift from 6 * The Comcast Innovation Fund. 7 * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> 8 * 9 * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa 10 * Portions Copyright (c) 2000 Akamba Corp. 11 * All rights reserved 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 /* 39 * Configuration and internal object management for dummynet. 40 */ 41 42 #include "opt_inet6.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/malloc.h> 47 #include <sys/mbuf.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/module.h> 51 #include <sys/mutex.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/socket.h> 56 #include <sys/socketvar.h> 57 #include <sys/time.h> 58 #include <sys/taskqueue.h> 59 #include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ 60 #include <netinet/in.h> 61 #include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ 62 #include <netinet/ip_fw.h> 63 #include <netinet/ip_dummynet.h> 64 65 #include <netpfil/ipfw/ip_fw_private.h> 66 #include <netpfil/ipfw/dn_heap.h> 67 #include <netpfil/ipfw/ip_dn_private.h> 68 #ifdef NEW_AQM 69 #include <netpfil/ipfw/dn_aqm.h> 70 #endif 71 #include <netpfil/ipfw/dn_sched.h> 72 73 /* which objects to copy */ 74 #define DN_C_LINK 0x01 75 #define DN_C_SCH 0x02 76 #define DN_C_FLOW 0x04 77 #define DN_C_FS 0x08 78 #define DN_C_QUEUE 0x10 79 80 /* we use this argument in case of a schk_new */ 81 struct schk_new_arg { 82 struct dn_alg *fp; 83 struct dn_sch *sch; 84 }; 85 86 /*---- callout hooks. ----*/ 87 static struct callout dn_timeout; 88 static int dn_gone; 89 static struct task dn_task; 90 static struct taskqueue *dn_tq = NULL; 91 92 static void 93 dummynet(void *arg) 94 { 95 96 (void)arg; /* UNUSED */ 97 taskqueue_enqueue(dn_tq, &dn_task); 98 } 99 100 void 101 dn_reschedule(void) 102 { 103 104 if (dn_gone != 0) 105 return; 106 callout_reset_sbt(&dn_timeout, tick_sbt, 0, dummynet, NULL, 107 C_HARDCLOCK | C_DIRECT_EXEC); 108 } 109 /*----- end of callout hooks -----*/ 110 111 #ifdef NEW_AQM 112 /* Return AQM descriptor for given type or name. */ 113 static struct dn_aqm * 114 find_aqm_type(int type, char *name) 115 { 116 struct dn_aqm *d; 117 118 SLIST_FOREACH(d, &dn_cfg.aqmlist, next) { 119 if (d->type == type || (name && !strcasecmp(d->name, name))) 120 return d; 121 } 122 return NULL; /* not found */ 123 } 124 #endif 125 126 /* Return a scheduler descriptor given the type or name. */ 127 static struct dn_alg * 128 find_sched_type(int type, char *name) 129 { 130 struct dn_alg *d; 131 132 SLIST_FOREACH(d, &dn_cfg.schedlist, next) { 133 if (d->type == type || (name && !strcasecmp(d->name, name))) 134 return d; 135 } 136 return NULL; /* not found */ 137 } 138 139 int 140 ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) 141 { 142 int oldv = *v; 143 const char *op = NULL; 144 if (dflt < lo) 145 dflt = lo; 146 if (dflt > hi) 147 dflt = hi; 148 if (oldv < lo) { 149 *v = dflt; 150 op = "Bump"; 151 } else if (oldv > hi) { 152 *v = hi; 153 op = "Clamp"; 154 } else 155 return *v; 156 if (op && msg) 157 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 158 return *v; 159 } 160 161 /*---- flow_id mask, hash and compare functions ---*/ 162 /* 163 * The flow_id includes the 5-tuple, the queue/pipe number 164 * which we store in the extra area in host order, 165 * and for ipv6 also the flow_id6. 166 * XXX see if we want the tos byte (can store in 'flags') 167 */ 168 static struct ipfw_flow_id * 169 flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) 170 { 171 int is_v6 = IS_IP6_FLOW_ID(id); 172 173 id->dst_port &= mask->dst_port; 174 id->src_port &= mask->src_port; 175 id->proto &= mask->proto; 176 id->extra &= mask->extra; 177 if (is_v6) { 178 APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); 179 APPLY_MASK(&id->src_ip6, &mask->src_ip6); 180 id->flow_id6 &= mask->flow_id6; 181 } else { 182 id->dst_ip &= mask->dst_ip; 183 id->src_ip &= mask->src_ip; 184 } 185 return id; 186 } 187 188 /* computes an OR of two masks, result in dst and also returned */ 189 static struct ipfw_flow_id * 190 flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) 191 { 192 int is_v6 = IS_IP6_FLOW_ID(dst); 193 194 dst->dst_port |= src->dst_port; 195 dst->src_port |= src->src_port; 196 dst->proto |= src->proto; 197 dst->extra |= src->extra; 198 if (is_v6) { 199 #define OR_MASK(_d, _s) \ 200 (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ 201 (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ 202 (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ 203 (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; 204 OR_MASK(&dst->dst_ip6, &src->dst_ip6); 205 OR_MASK(&dst->src_ip6, &src->src_ip6); 206 #undef OR_MASK 207 dst->flow_id6 |= src->flow_id6; 208 } else { 209 dst->dst_ip |= src->dst_ip; 210 dst->src_ip |= src->src_ip; 211 } 212 return dst; 213 } 214 215 static int 216 nonzero_mask(struct ipfw_flow_id *m) 217 { 218 if (m->dst_port || m->src_port || m->proto || m->extra) 219 return 1; 220 if (IS_IP6_FLOW_ID(m)) { 221 return 222 m->dst_ip6.__u6_addr.__u6_addr32[0] || 223 m->dst_ip6.__u6_addr.__u6_addr32[1] || 224 m->dst_ip6.__u6_addr.__u6_addr32[2] || 225 m->dst_ip6.__u6_addr.__u6_addr32[3] || 226 m->src_ip6.__u6_addr.__u6_addr32[0] || 227 m->src_ip6.__u6_addr.__u6_addr32[1] || 228 m->src_ip6.__u6_addr.__u6_addr32[2] || 229 m->src_ip6.__u6_addr.__u6_addr32[3] || 230 m->flow_id6; 231 } else { 232 return m->dst_ip || m->src_ip; 233 } 234 } 235 236 /* XXX we may want a better hash function */ 237 static uint32_t 238 flow_id_hash(struct ipfw_flow_id *id) 239 { 240 uint32_t i; 241 242 if (IS_IP6_FLOW_ID(id)) { 243 uint32_t *d = (uint32_t *)&id->dst_ip6; 244 uint32_t *s = (uint32_t *)&id->src_ip6; 245 i = (d[0] ) ^ (d[1]) ^ 246 (d[2] ) ^ (d[3]) ^ 247 (d[0] >> 15) ^ (d[1] >> 15) ^ 248 (d[2] >> 15) ^ (d[3] >> 15) ^ 249 (s[0] << 1) ^ (s[1] << 1) ^ 250 (s[2] << 1) ^ (s[3] << 1) ^ 251 (s[0] << 16) ^ (s[1] << 16) ^ 252 (s[2] << 16) ^ (s[3] << 16) ^ 253 (id->dst_port << 1) ^ (id->src_port) ^ 254 (id->extra) ^ 255 (id->proto ) ^ (id->flow_id6); 256 } else { 257 i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ 258 (id->src_ip << 1) ^ (id->src_ip >> 16) ^ 259 (id->extra) ^ 260 (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); 261 } 262 return i; 263 } 264 265 /* Like bcmp, returns 0 if ids match, 1 otherwise. */ 266 static int 267 flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) 268 { 269 int is_v6 = IS_IP6_FLOW_ID(id1); 270 271 if (!is_v6) { 272 if (IS_IP6_FLOW_ID(id2)) 273 return 1; /* different address families */ 274 275 return (id1->dst_ip == id2->dst_ip && 276 id1->src_ip == id2->src_ip && 277 id1->dst_port == id2->dst_port && 278 id1->src_port == id2->src_port && 279 id1->proto == id2->proto && 280 id1->extra == id2->extra) ? 0 : 1; 281 } 282 /* the ipv6 case */ 283 return ( 284 !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && 285 !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && 286 id1->dst_port == id2->dst_port && 287 id1->src_port == id2->src_port && 288 id1->proto == id2->proto && 289 id1->extra == id2->extra && 290 id1->flow_id6 == id2->flow_id6) ? 0 : 1; 291 } 292 /*--------- end of flow-id mask, hash and compare ---------*/ 293 294 /*--- support functions for the qht hashtable ---- 295 * Entries are hashed by flow-id 296 */ 297 static uint32_t 298 q_hash(uintptr_t key, int flags, void *arg) 299 { 300 /* compute the hash slot from the flow id */ 301 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 302 &((struct dn_queue *)key)->ni.fid : 303 (struct ipfw_flow_id *)key; 304 305 return flow_id_hash(id); 306 } 307 308 static int 309 q_match(void *obj, uintptr_t key, int flags, void *arg) 310 { 311 struct dn_queue *o = (struct dn_queue *)obj; 312 struct ipfw_flow_id *id2; 313 314 if (flags & DNHT_KEY_IS_OBJ) { 315 /* compare pointers */ 316 id2 = &((struct dn_queue *)key)->ni.fid; 317 } else { 318 id2 = (struct ipfw_flow_id *)key; 319 } 320 return (0 == flow_id_cmp(&o->ni.fid, id2)); 321 } 322 323 /* 324 * create a new queue instance for the given 'key'. 325 */ 326 static void * 327 q_new(uintptr_t key, int flags, void *arg) 328 { 329 struct dn_queue *q, *template = arg; 330 struct dn_fsk *fs = template->fs; 331 int size = sizeof(*q) + fs->sched->fp->q_datalen; 332 333 q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO); 334 if (q == NULL) { 335 D("no memory for new queue"); 336 return NULL; 337 } 338 339 set_oid(&q->ni.oid, DN_QUEUE, size); 340 if (fs->fs.flags & DN_QHT_HASH) 341 q->ni.fid = *(struct ipfw_flow_id *)key; 342 q->fs = fs; 343 q->_si = template->_si; 344 q->_si->q_count++; 345 346 if (fs->sched->fp->new_queue) 347 fs->sched->fp->new_queue(q); 348 349 #ifdef NEW_AQM 350 /* call AQM init function after creating a queue*/ 351 if (fs->aqmfp && fs->aqmfp->init) 352 if(fs->aqmfp->init(q)) 353 D("unable to init AQM for fs %d", fs->fs.fs_nr); 354 #endif 355 dn_cfg.queue_count++; 356 357 return q; 358 } 359 360 /* 361 * Notify schedulers that a queue is going away. 362 * If (flags & DN_DESTROY), also free the packets. 363 * The version for callbacks is called q_delete_cb(). 364 */ 365 static void 366 dn_delete_queue(struct dn_queue *q, int flags) 367 { 368 struct dn_fsk *fs = q->fs; 369 370 #ifdef NEW_AQM 371 /* clean up AQM status for queue 'q' 372 * cleanup here is called just with MULTIQUEUE 373 */ 374 if (fs && fs->aqmfp && fs->aqmfp->cleanup) 375 fs->aqmfp->cleanup(q); 376 #endif 377 // D("fs %p si %p\n", fs, q->_si); 378 /* notify the parent scheduler that the queue is going away */ 379 if (fs && fs->sched->fp->free_queue) 380 fs->sched->fp->free_queue(q); 381 q->_si->q_count--; 382 q->_si = NULL; 383 if (flags & DN_DESTROY) { 384 if (q->mq.head) 385 dn_free_pkts(q->mq.head); 386 bzero(q, sizeof(*q)); // safety 387 free(q, M_DUMMYNET); 388 dn_cfg.queue_count--; 389 } 390 } 391 392 static int 393 q_delete_cb(void *q, void *arg) 394 { 395 int flags = (int)(uintptr_t)arg; 396 dn_delete_queue(q, flags); 397 return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0; 398 } 399 400 /* 401 * calls dn_delete_queue/q_delete_cb on all queues, 402 * which notifies the parent scheduler and possibly drains packets. 403 * flags & DN_DESTROY: drains queues and destroy qht; 404 */ 405 static void 406 qht_delete(struct dn_fsk *fs, int flags) 407 { 408 ND("fs %d start flags %d qht %p", 409 fs->fs.fs_nr, flags, fs->qht); 410 if (!fs->qht) 411 return; 412 if (fs->fs.flags & DN_QHT_HASH) { 413 dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags); 414 if (flags & DN_DESTROY) { 415 dn_ht_free(fs->qht, 0); 416 fs->qht = NULL; 417 } 418 } else { 419 dn_delete_queue((struct dn_queue *)(fs->qht), flags); 420 if (flags & DN_DESTROY) 421 fs->qht = NULL; 422 } 423 } 424 425 /* 426 * Find and possibly create the queue for a MULTIQUEUE scheduler. 427 * We never call it for !MULTIQUEUE (the queue is in the sch_inst). 428 */ 429 struct dn_queue * 430 ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, 431 struct ipfw_flow_id *id) 432 { 433 struct dn_queue template; 434 435 template._si = si; 436 template.fs = fs; 437 438 if (fs->fs.flags & DN_QHT_HASH) { 439 struct ipfw_flow_id masked_id; 440 if (fs->qht == NULL) { 441 fs->qht = dn_ht_init(NULL, fs->fs.buckets, 442 offsetof(struct dn_queue, q_next), 443 q_hash, q_match, q_new); 444 if (fs->qht == NULL) 445 return NULL; 446 } 447 masked_id = *id; 448 flow_id_mask(&fs->fsk_mask, &masked_id); 449 return dn_ht_find(fs->qht, (uintptr_t)&masked_id, 450 DNHT_INSERT, &template); 451 } else { 452 if (fs->qht == NULL) 453 fs->qht = q_new(0, 0, &template); 454 return (struct dn_queue *)fs->qht; 455 } 456 } 457 /*--- end of queue hash table ---*/ 458 459 /*--- support functions for the sch_inst hashtable ---- 460 * 461 * These are hashed by flow-id 462 */ 463 static uint32_t 464 si_hash(uintptr_t key, int flags, void *arg) 465 { 466 /* compute the hash slot from the flow id */ 467 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 468 &((struct dn_sch_inst *)key)->ni.fid : 469 (struct ipfw_flow_id *)key; 470 471 return flow_id_hash(id); 472 } 473 474 static int 475 si_match(void *obj, uintptr_t key, int flags, void *arg) 476 { 477 struct dn_sch_inst *o = obj; 478 struct ipfw_flow_id *id2; 479 480 id2 = (flags & DNHT_KEY_IS_OBJ) ? 481 &((struct dn_sch_inst *)key)->ni.fid : 482 (struct ipfw_flow_id *)key; 483 return flow_id_cmp(&o->ni.fid, id2) == 0; 484 } 485 486 /* 487 * create a new instance for the given 'key' 488 * Allocate memory for instance, delay line and scheduler private data. 489 */ 490 static void * 491 si_new(uintptr_t key, int flags, void *arg) 492 { 493 struct dn_schk *s = arg; 494 struct dn_sch_inst *si; 495 int l = sizeof(*si) + s->fp->si_datalen; 496 497 si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 498 if (si == NULL) 499 goto error; 500 501 /* Set length only for the part passed up to userland. */ 502 set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); 503 set_oid(&(si->dline.oid), DN_DELAY_LINE, 504 sizeof(struct delay_line)); 505 /* mark si and dline as outside the event queue */ 506 si->ni.oid.id = si->dline.oid.id = -1; 507 508 si->sched = s; 509 si->dline.si = si; 510 511 if (s->fp->new_sched && s->fp->new_sched(si)) { 512 D("new_sched error"); 513 goto error; 514 } 515 if (s->sch.flags & DN_HAVE_MASK) 516 si->ni.fid = *(struct ipfw_flow_id *)key; 517 518 #ifdef NEW_AQM 519 /* init AQM status for !DN_MULTIQUEUE sched*/ 520 if (!(s->fp->flags & DN_MULTIQUEUE)) 521 if (s->fs->aqmfp && s->fs->aqmfp->init) 522 if(s->fs->aqmfp->init((struct dn_queue *)(si + 1))) { 523 D("unable to init AQM for fs %d", s->fs->fs.fs_nr); 524 goto error; 525 } 526 #endif 527 528 dn_cfg.si_count++; 529 return si; 530 531 error: 532 if (si) { 533 bzero(si, sizeof(*si)); // safety 534 free(si, M_DUMMYNET); 535 } 536 return NULL; 537 } 538 539 /* 540 * Callback from siht to delete all scheduler instances. Remove 541 * si and delay line from the system heap, destroy all queues. 542 * We assume that all flowset have been notified and do not 543 * point to us anymore. 544 */ 545 static int 546 si_destroy(void *_si, void *arg) 547 { 548 struct dn_sch_inst *si = _si; 549 struct dn_schk *s = si->sched; 550 struct delay_line *dl = &si->dline; 551 552 if (dl->oid.subtype) /* remove delay line from event heap */ 553 heap_extract(&dn_cfg.evheap, dl); 554 dn_free_pkts(dl->mq.head); /* drain delay line */ 555 if (si->kflags & DN_ACTIVE) /* remove si from event heap */ 556 heap_extract(&dn_cfg.evheap, si); 557 558 #ifdef NEW_AQM 559 /* clean up AQM status for !DN_MULTIQUEUE sched 560 * Note that all queues belong to fs were cleaned up in fsk_detach. 561 * When drain_scheduler is called s->fs and q->fs are pointing 562 * to a correct fs, so we can use fs in this case. 563 */ 564 if (!(s->fp->flags & DN_MULTIQUEUE)) { 565 struct dn_queue *q = (struct dn_queue *)(si + 1); 566 if (q->aqm_status && q->fs->aqmfp) 567 if (q->fs->aqmfp->cleanup) 568 q->fs->aqmfp->cleanup(q); 569 } 570 #endif 571 if (s->fp->free_sched) 572 s->fp->free_sched(si); 573 bzero(si, sizeof(*si)); /* safety */ 574 free(si, M_DUMMYNET); 575 dn_cfg.si_count--; 576 return DNHT_SCAN_DEL; 577 } 578 579 /* 580 * Find the scheduler instance for this packet. If we need to apply 581 * a mask, do on a local copy of the flow_id to preserve the original. 582 * Assume siht is always initialized if we have a mask. 583 */ 584 struct dn_sch_inst * 585 ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id) 586 { 587 588 if (s->sch.flags & DN_HAVE_MASK) { 589 struct ipfw_flow_id id_t = *id; 590 flow_id_mask(&s->sch.sched_mask, &id_t); 591 return dn_ht_find(s->siht, (uintptr_t)&id_t, 592 DNHT_INSERT, s); 593 } 594 if (!s->siht) 595 s->siht = si_new(0, 0, s); 596 return (struct dn_sch_inst *)s->siht; 597 } 598 599 /* callback to flush credit for the scheduler instance */ 600 static int 601 si_reset_credit(void *_si, void *arg) 602 { 603 struct dn_sch_inst *si = _si; 604 struct dn_link *p = &si->sched->link; 605 606 si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0); 607 return 0; 608 } 609 610 static void 611 schk_reset_credit(struct dn_schk *s) 612 { 613 if (s->sch.flags & DN_HAVE_MASK) 614 dn_ht_scan(s->siht, si_reset_credit, NULL); 615 else if (s->siht) 616 si_reset_credit(s->siht, NULL); 617 } 618 /*---- end of sch_inst hashtable ---------------------*/ 619 620 /*------------------------------------------------------- 621 * flowset hash (fshash) support. Entries are hashed by fs_nr. 622 * New allocations are put in the fsunlinked list, from which 623 * they are removed when they point to a specific scheduler. 624 */ 625 static uint32_t 626 fsk_hash(uintptr_t key, int flags, void *arg) 627 { 628 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 629 ((struct dn_fsk *)key)->fs.fs_nr; 630 631 return ( (i>>8)^(i>>4)^i ); 632 } 633 634 static int 635 fsk_match(void *obj, uintptr_t key, int flags, void *arg) 636 { 637 struct dn_fsk *fs = obj; 638 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 639 ((struct dn_fsk *)key)->fs.fs_nr; 640 641 return (fs->fs.fs_nr == i); 642 } 643 644 static void * 645 fsk_new(uintptr_t key, int flags, void *arg) 646 { 647 struct dn_fsk *fs; 648 649 fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO); 650 if (fs) { 651 set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs)); 652 dn_cfg.fsk_count++; 653 fs->drain_bucket = 0; 654 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 655 } 656 return fs; 657 } 658 659 #ifdef NEW_AQM 660 /* callback function for cleaning up AQM queue status belongs to a flowset 661 * connected to scheduler instance '_si' (for !DN_MULTIQUEUE only). 662 */ 663 static int 664 si_cleanup_q(void *_si, void *arg) 665 { 666 struct dn_sch_inst *si = _si; 667 668 if (!(si->sched->fp->flags & DN_MULTIQUEUE)) { 669 if (si->sched->fs->aqmfp && si->sched->fs->aqmfp->cleanup) 670 si->sched->fs->aqmfp->cleanup((struct dn_queue *) (si+1)); 671 } 672 return 0; 673 } 674 675 /* callback to clean up queue AQM status.*/ 676 static int 677 q_cleanup_q(void *_q, void *arg) 678 { 679 struct dn_queue *q = _q; 680 q->fs->aqmfp->cleanup(q); 681 return 0; 682 } 683 684 /* Clean up all AQM queues status belongs to flowset 'fs' and then 685 * deconfig AQM for flowset 'fs' 686 */ 687 static void 688 aqm_cleanup_deconfig_fs(struct dn_fsk *fs) 689 { 690 struct dn_sch_inst *si; 691 692 /* clean up AQM status for all queues for !DN_MULTIQUEUE sched*/ 693 if (fs->fs.fs_nr > DN_MAX_ID) { 694 if (fs->sched && !(fs->sched->fp->flags & DN_MULTIQUEUE)) { 695 if (fs->sched->sch.flags & DN_HAVE_MASK) 696 dn_ht_scan(fs->sched->siht, si_cleanup_q, NULL); 697 else { 698 /* single si i.e. no sched mask */ 699 si = (struct dn_sch_inst *) fs->sched->siht; 700 if (si && fs->aqmfp && fs->aqmfp->cleanup) 701 fs->aqmfp->cleanup((struct dn_queue *) (si+1)); 702 } 703 } 704 } 705 706 /* clean up AQM status for all queues for DN_MULTIQUEUE sched*/ 707 if (fs->sched && fs->sched->fp->flags & DN_MULTIQUEUE && fs->qht) { 708 if (fs->fs.flags & DN_QHT_HASH) 709 dn_ht_scan(fs->qht, q_cleanup_q, NULL); 710 else 711 fs->aqmfp->cleanup((struct dn_queue *)(fs->qht)); 712 } 713 714 /* deconfig AQM */ 715 if(fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig) 716 fs->aqmfp->deconfig(fs); 717 } 718 #endif 719 720 /* 721 * detach flowset from its current scheduler. Flags as follows: 722 * DN_DETACH removes from the fsk_list 723 * DN_DESTROY deletes individual queues 724 * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked). 725 */ 726 static void 727 fsk_detach(struct dn_fsk *fs, int flags) 728 { 729 if (flags & DN_DELETE_FS) 730 flags |= DN_DESTROY; 731 ND("fs %d from sched %d flags %s %s %s", 732 fs->fs.fs_nr, fs->fs.sched_nr, 733 (flags & DN_DELETE_FS) ? "DEL_FS":"", 734 (flags & DN_DESTROY) ? "DEL":"", 735 (flags & DN_DETACH) ? "DET":""); 736 if (flags & DN_DETACH) { /* detach from the list */ 737 struct dn_fsk_head *h; 738 h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu; 739 SLIST_REMOVE(h, fs, dn_fsk, sch_chain); 740 } 741 /* Free the RED parameters, they will be recomputed on 742 * subsequent attach if needed. 743 */ 744 if (fs->w_q_lookup) 745 free(fs->w_q_lookup, M_DUMMYNET); 746 fs->w_q_lookup = NULL; 747 qht_delete(fs, flags); 748 #ifdef NEW_AQM 749 aqm_cleanup_deconfig_fs(fs); 750 #endif 751 752 if (fs->sched && fs->sched->fp->free_fsk) 753 fs->sched->fp->free_fsk(fs); 754 fs->sched = NULL; 755 if (flags & DN_DELETE_FS) { 756 bzero(fs, sizeof(*fs)); /* safety */ 757 free(fs, M_DUMMYNET); 758 dn_cfg.fsk_count--; 759 } else { 760 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 761 } 762 } 763 764 /* 765 * Detach or destroy all flowsets in a list. 766 * flags specifies what to do: 767 * DN_DESTROY: flush all queues 768 * DN_DELETE_FS: DN_DESTROY + destroy flowset 769 * DN_DELETE_FS implies DN_DESTROY 770 */ 771 static void 772 fsk_detach_list(struct dn_fsk_head *h, int flags) 773 { 774 struct dn_fsk *fs; 775 int n = 0; /* only for stats */ 776 777 ND("head %p flags %x", h, flags); 778 while ((fs = SLIST_FIRST(h))) { 779 SLIST_REMOVE_HEAD(h, sch_chain); 780 n++; 781 fsk_detach(fs, flags); 782 } 783 ND("done %d flowsets", n); 784 } 785 786 /* 787 * called on 'queue X delete' -- removes the flowset from fshash, 788 * deletes all queues for the flowset, and removes the flowset. 789 */ 790 static int 791 delete_fs(int i, int locked) 792 { 793 struct dn_fsk *fs; 794 int err = 0; 795 796 if (!locked) 797 DN_BH_WLOCK(); 798 fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL); 799 ND("fs %d found %p", i, fs); 800 if (fs) { 801 fsk_detach(fs, DN_DETACH | DN_DELETE_FS); 802 err = 0; 803 } else 804 err = EINVAL; 805 if (!locked) 806 DN_BH_WUNLOCK(); 807 return err; 808 } 809 810 /*----- end of flowset hashtable support -------------*/ 811 812 /*------------------------------------------------------------ 813 * Scheduler hash. When searching by index we pass sched_nr, 814 * otherwise we pass struct dn_sch * which is the first field in 815 * struct dn_schk so we can cast between the two. We use this trick 816 * because in the create phase (but it should be fixed). 817 */ 818 static uint32_t 819 schk_hash(uintptr_t key, int flags, void *_arg) 820 { 821 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 822 ((struct dn_schk *)key)->sch.sched_nr; 823 return ( (i>>8)^(i>>4)^i ); 824 } 825 826 static int 827 schk_match(void *obj, uintptr_t key, int flags, void *_arg) 828 { 829 struct dn_schk *s = (struct dn_schk *)obj; 830 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 831 ((struct dn_schk *)key)->sch.sched_nr; 832 return (s->sch.sched_nr == i); 833 } 834 835 /* 836 * Create the entry and intialize with the sched hash if needed. 837 * Leave s->fp unset so we can tell whether a dn_ht_find() returns 838 * a new object or a previously existing one. 839 */ 840 static void * 841 schk_new(uintptr_t key, int flags, void *arg) 842 { 843 struct schk_new_arg *a = arg; 844 struct dn_schk *s; 845 int l = sizeof(*s) +a->fp->schk_datalen; 846 847 s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 848 if (s == NULL) 849 return NULL; 850 set_oid(&s->link.oid, DN_LINK, sizeof(s->link)); 851 s->sch = *a->sch; // copy initial values 852 s->link.link_nr = s->sch.sched_nr; 853 SLIST_INIT(&s->fsk_list); 854 /* initialize the hash table or create the single instance */ 855 s->fp = a->fp; /* si_new needs this */ 856 s->drain_bucket = 0; 857 if (s->sch.flags & DN_HAVE_MASK) { 858 s->siht = dn_ht_init(NULL, s->sch.buckets, 859 offsetof(struct dn_sch_inst, si_next), 860 si_hash, si_match, si_new); 861 if (s->siht == NULL) { 862 free(s, M_DUMMYNET); 863 return NULL; 864 } 865 } 866 s->fp = NULL; /* mark as a new scheduler */ 867 dn_cfg.schk_count++; 868 return s; 869 } 870 871 /* 872 * Callback for sched delete. Notify all attached flowsets to 873 * detach from the scheduler, destroy the internal flowset, and 874 * all instances. The scheduler goes away too. 875 * arg is 0 (only detach flowsets and destroy instances) 876 * DN_DESTROY (detach & delete queues, delete schk) 877 * or DN_DELETE_FS (delete queues and flowsets, delete schk) 878 */ 879 static int 880 schk_delete_cb(void *obj, void *arg) 881 { 882 struct dn_schk *s = obj; 883 #if 0 884 int a = (int)arg; 885 ND("sched %d arg %s%s", 886 s->sch.sched_nr, 887 a&DN_DESTROY ? "DEL ":"", 888 a&DN_DELETE_FS ? "DEL_FS":""); 889 #endif 890 fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); 891 /* no more flowset pointing to us now */ 892 if (s->sch.flags & DN_HAVE_MASK) { 893 dn_ht_scan(s->siht, si_destroy, NULL); 894 dn_ht_free(s->siht, 0); 895 } else if (s->siht) 896 si_destroy(s->siht, NULL); 897 if (s->profile) { 898 free(s->profile, M_DUMMYNET); 899 s->profile = NULL; 900 } 901 s->siht = NULL; 902 if (s->fp->destroy) 903 s->fp->destroy(s); 904 bzero(s, sizeof(*s)); // safety 905 free(obj, M_DUMMYNET); 906 dn_cfg.schk_count--; 907 return DNHT_SCAN_DEL; 908 } 909 910 /* 911 * called on a 'sched X delete' command. Deletes a single scheduler. 912 * This is done by removing from the schedhash, unlinking all 913 * flowsets and deleting their traffic. 914 */ 915 static int 916 delete_schk(int i) 917 { 918 struct dn_schk *s; 919 920 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 921 ND("%d %p", i, s); 922 if (!s) 923 return EINVAL; 924 delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */ 925 /* then detach flowsets, delete traffic */ 926 schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY); 927 return 0; 928 } 929 /*--- end of schk hashtable support ---*/ 930 931 static int 932 copy_obj(char **start, char *end, void *_o, const char *msg, int i) 933 { 934 struct dn_id o; 935 union { 936 struct dn_link l; 937 struct dn_schk s; 938 } dn; 939 int have = end - *start; 940 941 memcpy(&o, _o, sizeof(o)); 942 if (have < o.len || o.len == 0 || o.type == 0) { 943 D("(WARN) type %d %s %d have %d need %d", 944 o.type, msg, i, have, o.len); 945 return 1; 946 } 947 ND("type %d %s %d len %d", o.type, msg, i, o.len); 948 if (o.type == DN_LINK) { 949 memcpy(&dn.l, _o, sizeof(dn.l)); 950 /* Adjust burst parameter for link */ 951 dn.l.burst = div64(dn.l.burst, 8 * hz); 952 dn.l.delay = dn.l.delay * 1000 / hz; 953 memcpy(*start, &dn.l, sizeof(dn.l)); 954 } else if (o.type == DN_SCH) { 955 /* Set dn.s.sch.oid.id to the number of instances */ 956 memcpy(&dn.s, _o, sizeof(dn.s)); 957 dn.s.sch.oid.id = (dn.s.sch.flags & DN_HAVE_MASK) ? 958 dn_ht_entries(dn.s.siht) : (dn.s.siht ? 1 : 0); 959 memcpy(*start, &dn.s, sizeof(dn.s)); 960 } else 961 memcpy(*start, _o, o.len); 962 *start += o.len; 963 return 0; 964 } 965 966 /* Specific function to copy a queue. 967 * Copies only the user-visible part of a queue (which is in 968 * a struct dn_flow), and sets len accordingly. 969 */ 970 static int 971 copy_obj_q(char **start, char *end, void *_o, const char *msg, int i) 972 { 973 struct dn_id *o = _o; 974 int have = end - *start; 975 int len = sizeof(struct dn_flow); /* see above comment */ 976 977 if (have < len || o->len == 0 || o->type != DN_QUEUE) { 978 D("ERROR type %d %s %d have %d need %d", 979 o->type, msg, i, have, len); 980 return 1; 981 } 982 ND("type %d %s %d len %d", o->type, msg, i, len); 983 memcpy(*start, _o, len); 984 ((struct dn_id*)(*start))->len = len; 985 *start += len; 986 return 0; 987 } 988 989 static int 990 copy_q_cb(void *obj, void *arg) 991 { 992 struct dn_queue *q = obj; 993 struct copy_args *a = arg; 994 struct dn_flow *ni = (struct dn_flow *)(*a->start); 995 if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1)) 996 return DNHT_SCAN_END; 997 ni->oid.type = DN_FLOW; /* override the DN_QUEUE */ 998 ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL); 999 return 0; 1000 } 1001 1002 static int 1003 copy_q(struct copy_args *a, struct dn_fsk *fs, int flags) 1004 { 1005 if (!fs->qht) 1006 return 0; 1007 if (fs->fs.flags & DN_QHT_HASH) 1008 dn_ht_scan(fs->qht, copy_q_cb, a); 1009 else 1010 copy_q_cb(fs->qht, a); 1011 return 0; 1012 } 1013 1014 /* 1015 * This routine only copies the initial part of a profile ? XXX 1016 */ 1017 static int 1018 copy_profile(struct copy_args *a, struct dn_profile *p) 1019 { 1020 int have = a->end - *a->start; 1021 /* XXX here we check for max length */ 1022 int profile_len = sizeof(struct dn_profile) - 1023 ED_MAX_SAMPLES_NO*sizeof(int); 1024 1025 if (p == NULL) 1026 return 0; 1027 if (have < profile_len) { 1028 D("error have %d need %d", have, profile_len); 1029 return 1; 1030 } 1031 memcpy(*a->start, p, profile_len); 1032 ((struct dn_id *)(*a->start))->len = profile_len; 1033 *a->start += profile_len; 1034 return 0; 1035 } 1036 1037 static int 1038 copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags) 1039 { 1040 struct dn_fs *ufs = (struct dn_fs *)(*a->start); 1041 if (!fs) 1042 return 0; 1043 ND("flowset %d", fs->fs.fs_nr); 1044 if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr)) 1045 return DNHT_SCAN_END; 1046 ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ? 1047 dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0); 1048 if (flags) { /* copy queues */ 1049 copy_q(a, fs, 0); 1050 } 1051 return 0; 1052 } 1053 1054 static int 1055 copy_si_cb(void *obj, void *arg) 1056 { 1057 struct dn_sch_inst *si = obj; 1058 struct copy_args *a = arg; 1059 struct dn_flow *ni = (struct dn_flow *)(*a->start); 1060 if (copy_obj(a->start, a->end, &si->ni, "inst", 1061 si->sched->sch.sched_nr)) 1062 return DNHT_SCAN_END; 1063 ni->oid.type = DN_FLOW; /* override the DN_SCH_I */ 1064 ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL); 1065 return 0; 1066 } 1067 1068 static int 1069 copy_si(struct copy_args *a, struct dn_schk *s, int flags) 1070 { 1071 if (s->sch.flags & DN_HAVE_MASK) 1072 dn_ht_scan(s->siht, copy_si_cb, a); 1073 else if (s->siht) 1074 copy_si_cb(s->siht, a); 1075 return 0; 1076 } 1077 1078 /* 1079 * compute a list of children of a scheduler and copy up 1080 */ 1081 static int 1082 copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags) 1083 { 1084 struct dn_fsk *fs; 1085 struct dn_id *o; 1086 uint32_t *p; 1087 1088 int n = 0, space = sizeof(*o); 1089 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1090 if (fs->fs.fs_nr < DN_MAX_ID) 1091 n++; 1092 } 1093 space += n * sizeof(uint32_t); 1094 DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n); 1095 if (a->end - *(a->start) < space) 1096 return DNHT_SCAN_END; 1097 o = (struct dn_id *)(*(a->start)); 1098 o->len = space; 1099 *a->start += o->len; 1100 o->type = DN_TEXT; 1101 p = (uint32_t *)(o+1); 1102 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) 1103 if (fs->fs.fs_nr < DN_MAX_ID) 1104 *p++ = fs->fs.fs_nr; 1105 return 0; 1106 } 1107 1108 static int 1109 copy_data_helper(void *_o, void *_arg) 1110 { 1111 struct copy_args *a = _arg; 1112 uint32_t *r = a->extra->r; /* start of first range */ 1113 uint32_t *lim; /* first invalid pointer */ 1114 int n; 1115 1116 lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len); 1117 1118 if (a->type == DN_LINK || a->type == DN_SCH) { 1119 /* pipe|sched show, we receive a dn_schk */ 1120 struct dn_schk *s = _o; 1121 1122 n = s->sch.sched_nr; 1123 if (a->type == DN_SCH && n >= DN_MAX_ID) 1124 return 0; /* not a scheduler */ 1125 if (a->type == DN_LINK && n <= DN_MAX_ID) 1126 return 0; /* not a pipe */ 1127 1128 /* see if the object is within one of our ranges */ 1129 for (;r < lim; r += 2) { 1130 if (n < r[0] || n > r[1]) 1131 continue; 1132 /* Found a valid entry, copy and we are done */ 1133 if (a->flags & DN_C_LINK) { 1134 if (copy_obj(a->start, a->end, 1135 &s->link, "link", n)) 1136 return DNHT_SCAN_END; 1137 if (copy_profile(a, s->profile)) 1138 return DNHT_SCAN_END; 1139 if (copy_flowset(a, s->fs, 0)) 1140 return DNHT_SCAN_END; 1141 } 1142 if (a->flags & DN_C_SCH) { 1143 if (copy_obj(a->start, a->end, 1144 &s->sch, "sched", n)) 1145 return DNHT_SCAN_END; 1146 /* list all attached flowsets */ 1147 if (copy_fsk_list(a, s, 0)) 1148 return DNHT_SCAN_END; 1149 } 1150 if (a->flags & DN_C_FLOW) 1151 copy_si(a, s, 0); 1152 break; 1153 } 1154 } else if (a->type == DN_FS) { 1155 /* queue show, skip internal flowsets */ 1156 struct dn_fsk *fs = _o; 1157 1158 n = fs->fs.fs_nr; 1159 if (n >= DN_MAX_ID) 1160 return 0; 1161 /* see if the object is within one of our ranges */ 1162 for (;r < lim; r += 2) { 1163 if (n < r[0] || n > r[1]) 1164 continue; 1165 if (copy_flowset(a, fs, 0)) 1166 return DNHT_SCAN_END; 1167 copy_q(a, fs, 0); 1168 break; /* we are done */ 1169 } 1170 } 1171 return 0; 1172 } 1173 1174 static inline struct dn_schk * 1175 locate_scheduler(int i) 1176 { 1177 return dn_ht_find(dn_cfg.schedhash, i, 0, NULL); 1178 } 1179 1180 /* 1181 * red parameters are in fixed point arithmetic. 1182 */ 1183 static int 1184 config_red(struct dn_fsk *fs) 1185 { 1186 int64_t s, idle, weight, w0; 1187 int t, i; 1188 1189 fs->w_q = fs->fs.w_q; 1190 fs->max_p = fs->fs.max_p; 1191 ND("called"); 1192 /* Doing stuff that was in userland */ 1193 i = fs->sched->link.bandwidth; 1194 s = (i <= 0) ? 0 : 1195 hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; 1196 1197 idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */ 1198 fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth); 1199 /* fs->lookup_step not scaled, */ 1200 if (!fs->lookup_step) 1201 fs->lookup_step = 1; 1202 w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled 1203 1204 for (t = fs->lookup_step; t > 1; --t) 1205 weight = SCALE_MUL(weight, w0); 1206 fs->lookup_weight = (int)(weight); // scaled 1207 1208 /* Now doing stuff that was in kerneland */ 1209 fs->min_th = SCALE(fs->fs.min_th); 1210 fs->max_th = SCALE(fs->fs.max_th); 1211 1212 if (fs->fs.max_th == fs->fs.min_th) 1213 fs->c_1 = fs->max_p; 1214 else 1215 fs->c_1 = SCALE((int64_t)(fs->max_p)) / (fs->fs.max_th - fs->fs.min_th); 1216 fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th)); 1217 1218 if (fs->fs.flags & DN_IS_GENTLE_RED) { 1219 fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th; 1220 fs->c_4 = SCALE(1) - 2 * fs->max_p; 1221 } 1222 1223 /* If the lookup table already exist, free and create it again. */ 1224 if (fs->w_q_lookup) { 1225 free(fs->w_q_lookup, M_DUMMYNET); 1226 fs->w_q_lookup = NULL; 1227 } 1228 if (dn_cfg.red_lookup_depth == 0) { 1229 printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" 1230 "must be > 0\n"); 1231 fs->fs.flags &= ~DN_IS_RED; 1232 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1233 return (EINVAL); 1234 } 1235 fs->lookup_depth = dn_cfg.red_lookup_depth; 1236 fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int), 1237 M_DUMMYNET, M_NOWAIT); 1238 if (fs->w_q_lookup == NULL) { 1239 printf("dummynet: sorry, cannot allocate red lookup table\n"); 1240 fs->fs.flags &= ~DN_IS_RED; 1241 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1242 return(ENOSPC); 1243 } 1244 1245 /* Fill the lookup table with (1 - w_q)^x */ 1246 fs->w_q_lookup[0] = SCALE(1) - fs->w_q; 1247 1248 for (i = 1; i < fs->lookup_depth; i++) 1249 fs->w_q_lookup[i] = 1250 SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight); 1251 1252 if (dn_cfg.red_avg_pkt_size < 1) 1253 dn_cfg.red_avg_pkt_size = 512; 1254 fs->avg_pkt_size = dn_cfg.red_avg_pkt_size; 1255 if (dn_cfg.red_max_pkt_size < 1) 1256 dn_cfg.red_max_pkt_size = 1500; 1257 fs->max_pkt_size = dn_cfg.red_max_pkt_size; 1258 ND("exit"); 1259 return 0; 1260 } 1261 1262 /* Scan all flowset attached to this scheduler and update red */ 1263 static void 1264 update_red(struct dn_schk *s) 1265 { 1266 struct dn_fsk *fs; 1267 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1268 if (fs && (fs->fs.flags & DN_IS_RED)) 1269 config_red(fs); 1270 } 1271 } 1272 1273 /* attach flowset to scheduler s, possibly requeue */ 1274 static void 1275 fsk_attach(struct dn_fsk *fs, struct dn_schk *s) 1276 { 1277 ND("remove fs %d from fsunlinked, link to sched %d", 1278 fs->fs.fs_nr, s->sch.sched_nr); 1279 SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain); 1280 fs->sched = s; 1281 SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); 1282 if (s->fp->new_fsk) 1283 s->fp->new_fsk(fs); 1284 /* XXX compute fsk_mask */ 1285 fs->fsk_mask = fs->fs.flow_mask; 1286 if (fs->sched->sch.flags & DN_HAVE_MASK) 1287 flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask); 1288 if (fs->qht) { 1289 /* 1290 * we must drain qht according to the old 1291 * type, and reinsert according to the new one. 1292 * The requeue is complex -- in general we need to 1293 * reclassify every single packet. 1294 * For the time being, let's hope qht is never set 1295 * when we reach this point. 1296 */ 1297 D("XXX TODO requeue from fs %d to sch %d", 1298 fs->fs.fs_nr, s->sch.sched_nr); 1299 fs->qht = NULL; 1300 } 1301 /* set the new type for qht */ 1302 if (nonzero_mask(&fs->fsk_mask)) 1303 fs->fs.flags |= DN_QHT_HASH; 1304 else 1305 fs->fs.flags &= ~DN_QHT_HASH; 1306 1307 /* XXX config_red() can fail... */ 1308 if (fs->fs.flags & DN_IS_RED) 1309 config_red(fs); 1310 } 1311 1312 /* update all flowsets which may refer to this scheduler */ 1313 static void 1314 update_fs(struct dn_schk *s) 1315 { 1316 struct dn_fsk *fs, *tmp; 1317 1318 SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) { 1319 if (s->sch.sched_nr != fs->fs.sched_nr) { 1320 D("fs %d for sch %d not %d still unlinked", 1321 fs->fs.fs_nr, fs->fs.sched_nr, 1322 s->sch.sched_nr); 1323 continue; 1324 } 1325 fsk_attach(fs, s); 1326 } 1327 } 1328 1329 #ifdef NEW_AQM 1330 /* Retrieve AQM configurations to ipfw userland 1331 */ 1332 static int 1333 get_aqm_parms(struct sockopt *sopt) 1334 { 1335 struct dn_extra_parms *ep; 1336 struct dn_fsk *fs; 1337 size_t sopt_valsize; 1338 int l, err = 0; 1339 1340 sopt_valsize = sopt->sopt_valsize; 1341 l = sizeof(*ep); 1342 if (sopt->sopt_valsize < l) { 1343 D("bad len sopt->sopt_valsize %d len %d", 1344 (int) sopt->sopt_valsize , l); 1345 err = EINVAL; 1346 return err; 1347 } 1348 ep = malloc(l, M_DUMMYNET, M_WAITOK); 1349 if(!ep) { 1350 err = ENOMEM ; 1351 return err; 1352 } 1353 do { 1354 err = sooptcopyin(sopt, ep, l, l); 1355 if(err) 1356 break; 1357 sopt->sopt_valsize = sopt_valsize; 1358 if (ep->oid.len < l) { 1359 err = EINVAL; 1360 break; 1361 } 1362 1363 fs = dn_ht_find(dn_cfg.fshash, ep->nr, 0, NULL); 1364 if (!fs) { 1365 D("fs %d not found", ep->nr); 1366 err = EINVAL; 1367 break; 1368 } 1369 1370 if (fs->aqmfp && fs->aqmfp->getconfig) { 1371 if(fs->aqmfp->getconfig(fs, ep)) { 1372 D("Error while trying to get AQM params"); 1373 err = EINVAL; 1374 break; 1375 } 1376 ep->oid.len = l; 1377 err = sooptcopyout(sopt, ep, l); 1378 } 1379 }while(0); 1380 1381 free(ep, M_DUMMYNET); 1382 return err; 1383 } 1384 1385 /* Retrieve AQM configurations to ipfw userland 1386 */ 1387 static int 1388 get_sched_parms(struct sockopt *sopt) 1389 { 1390 struct dn_extra_parms *ep; 1391 struct dn_schk *schk; 1392 size_t sopt_valsize; 1393 int l, err = 0; 1394 1395 sopt_valsize = sopt->sopt_valsize; 1396 l = sizeof(*ep); 1397 if (sopt->sopt_valsize < l) { 1398 D("bad len sopt->sopt_valsize %d len %d", 1399 (int) sopt->sopt_valsize , l); 1400 err = EINVAL; 1401 return err; 1402 } 1403 ep = malloc(l, M_DUMMYNET, M_WAITOK); 1404 if(!ep) { 1405 err = ENOMEM ; 1406 return err; 1407 } 1408 do { 1409 err = sooptcopyin(sopt, ep, l, l); 1410 if(err) 1411 break; 1412 sopt->sopt_valsize = sopt_valsize; 1413 if (ep->oid.len < l) { 1414 err = EINVAL; 1415 break; 1416 } 1417 1418 schk = locate_scheduler(ep->nr); 1419 if (!schk) { 1420 D("sched %d not found", ep->nr); 1421 err = EINVAL; 1422 break; 1423 } 1424 1425 if (schk->fp && schk->fp->getconfig) { 1426 if(schk->fp->getconfig(schk, ep)) { 1427 D("Error while trying to get sched params"); 1428 err = EINVAL; 1429 break; 1430 } 1431 ep->oid.len = l; 1432 err = sooptcopyout(sopt, ep, l); 1433 } 1434 }while(0); 1435 free(ep, M_DUMMYNET); 1436 1437 return err; 1438 } 1439 1440 /* Configure AQM for flowset 'fs'. 1441 * extra parameters are passed from userland. 1442 */ 1443 static int 1444 config_aqm(struct dn_fsk *fs, struct dn_extra_parms *ep, int busy) 1445 { 1446 int err = 0; 1447 1448 do { 1449 /* no configurations */ 1450 if (!ep) { 1451 err = 0; 1452 break; 1453 } 1454 1455 /* no AQM for this flowset*/ 1456 if (!strcmp(ep->name,"")) { 1457 err = 0; 1458 break; 1459 } 1460 if (ep->oid.len < sizeof(*ep)) { 1461 D("short aqm len %d", ep->oid.len); 1462 err = EINVAL; 1463 break; 1464 } 1465 1466 if (busy) { 1467 D("Unable to configure flowset, flowset busy!"); 1468 err = EINVAL; 1469 break; 1470 } 1471 1472 /* deconfigure old aqm if exist */ 1473 if (fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig) { 1474 aqm_cleanup_deconfig_fs(fs); 1475 } 1476 1477 if (!(fs->aqmfp = find_aqm_type(0, ep->name))) { 1478 D("AQM functions not found for type %s!", ep->name); 1479 fs->fs.flags &= ~DN_IS_AQM; 1480 err = EINVAL; 1481 break; 1482 } else 1483 fs->fs.flags |= DN_IS_AQM; 1484 1485 if (ep->oid.subtype != DN_AQM_PARAMS) { 1486 D("Wrong subtype"); 1487 err = EINVAL; 1488 break; 1489 } 1490 1491 if (fs->aqmfp->config) { 1492 err = fs->aqmfp->config(fs, ep, ep->oid.len); 1493 if (err) { 1494 D("Unable to configure AQM for FS %d", fs->fs.fs_nr ); 1495 fs->fs.flags &= ~DN_IS_AQM; 1496 fs->aqmfp = NULL; 1497 break; 1498 } 1499 } 1500 } while(0); 1501 1502 return err; 1503 } 1504 #endif 1505 1506 /* 1507 * Configuration -- to preserve backward compatibility we use 1508 * the following scheme (N is 65536) 1509 * NUMBER SCHED LINK FLOWSET 1510 * 1 .. N-1 (1)WFQ (2)WFQ (3)queue 1511 * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1 1512 * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1 1513 * 1514 * "pipe i config" configures #1, #2 and #3 1515 * "sched i config" configures #1 and possibly #6 1516 * "queue i config" configures #3 1517 * #1 is configured with 'pipe i config' or 'sched i config' 1518 * #2 is configured with 'pipe i config', and created if not 1519 * existing with 'sched i config' 1520 * #3 is configured with 'queue i config' 1521 * #4 is automatically configured after #1, can only be FIFO 1522 * #5 is automatically configured after #2 1523 * #6 is automatically created when #1 is !MULTIQUEUE, 1524 * and can be updated. 1525 * #7 is automatically configured after #2 1526 */ 1527 1528 /* 1529 * configure a link (and its FIFO instance) 1530 */ 1531 static int 1532 config_link(struct dn_link *p, struct dn_id *arg) 1533 { 1534 int i; 1535 1536 if (p->oid.len != sizeof(*p)) { 1537 D("invalid pipe len %d", p->oid.len); 1538 return EINVAL; 1539 } 1540 i = p->link_nr; 1541 if (i <= 0 || i >= DN_MAX_ID) 1542 return EINVAL; 1543 /* 1544 * The config program passes parameters as follows: 1545 * bw = bits/second (0 means no limits), 1546 * delay = ms, must be translated into ticks. 1547 * qsize = slots/bytes 1548 * burst ??? 1549 */ 1550 p->delay = (p->delay * hz) / 1000; 1551 /* Scale burst size: bytes -> bits * hz */ 1552 p->burst *= 8 * hz; 1553 1554 DN_BH_WLOCK(); 1555 /* do it twice, base link and FIFO link */ 1556 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1557 struct dn_schk *s = locate_scheduler(i); 1558 if (s == NULL) { 1559 DN_BH_WUNLOCK(); 1560 D("sched %d not found", i); 1561 return EINVAL; 1562 } 1563 /* remove profile if exists */ 1564 if (s->profile) { 1565 free(s->profile, M_DUMMYNET); 1566 s->profile = NULL; 1567 } 1568 /* copy all parameters */ 1569 s->link.oid = p->oid; 1570 s->link.link_nr = i; 1571 s->link.delay = p->delay; 1572 if (s->link.bandwidth != p->bandwidth) { 1573 /* XXX bandwidth changes, need to update red params */ 1574 s->link.bandwidth = p->bandwidth; 1575 update_red(s); 1576 } 1577 s->link.burst = p->burst; 1578 schk_reset_credit(s); 1579 } 1580 dn_cfg.id++; 1581 DN_BH_WUNLOCK(); 1582 return 0; 1583 } 1584 1585 /* 1586 * configure a flowset. Can be called from inside with locked=1, 1587 */ 1588 static struct dn_fsk * 1589 config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) 1590 { 1591 int i; 1592 struct dn_fsk *fs; 1593 #ifdef NEW_AQM 1594 struct dn_extra_parms *ep; 1595 #endif 1596 1597 if (nfs->oid.len != sizeof(*nfs)) { 1598 D("invalid flowset len %d", nfs->oid.len); 1599 return NULL; 1600 } 1601 i = nfs->fs_nr; 1602 if (i <= 0 || i >= 3*DN_MAX_ID) 1603 return NULL; 1604 #ifdef NEW_AQM 1605 ep = NULL; 1606 if (arg != NULL) { 1607 ep = malloc(sizeof(*ep), M_TEMP, locked ? M_NOWAIT : M_WAITOK); 1608 if (ep == NULL) 1609 return (NULL); 1610 memcpy(ep, arg, sizeof(*ep)); 1611 } 1612 #endif 1613 ND("flowset %d", i); 1614 /* XXX other sanity checks */ 1615 if (nfs->flags & DN_QSIZE_BYTES) { 1616 ipdn_bound_var(&nfs->qsize, 16384, 1617 1500, dn_cfg.byte_limit, NULL); // "queue byte size"); 1618 } else { 1619 ipdn_bound_var(&nfs->qsize, 50, 1620 1, dn_cfg.slot_limit, NULL); // "queue slot size"); 1621 } 1622 if (nfs->flags & DN_HAVE_MASK) { 1623 /* make sure we have some buckets */ 1624 ipdn_bound_var((int *)&nfs->buckets, dn_cfg.hash_size, 1625 1, dn_cfg.max_hash_size, "flowset buckets"); 1626 } else { 1627 nfs->buckets = 1; /* we only need 1 */ 1628 } 1629 if (!locked) 1630 DN_BH_WLOCK(); 1631 do { /* exit with break when done */ 1632 struct dn_schk *s; 1633 int flags = nfs->sched_nr ? DNHT_INSERT : 0; 1634 int j; 1635 int oldc = dn_cfg.fsk_count; 1636 fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL); 1637 if (fs == NULL) { 1638 D("missing sched for flowset %d", i); 1639 break; 1640 } 1641 /* grab some defaults from the existing one */ 1642 if (nfs->sched_nr == 0) /* reuse */ 1643 nfs->sched_nr = fs->fs.sched_nr; 1644 for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) { 1645 if (nfs->par[j] == -1) /* reuse */ 1646 nfs->par[j] = fs->fs.par[j]; 1647 } 1648 if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) { 1649 ND("flowset %d unchanged", i); 1650 #ifdef NEW_AQM 1651 if (ep != NULL) { 1652 /* 1653 * Reconfigure AQM as the parameters can be changed. 1654 * We consider the flowset as busy if it has scheduler 1655 * instance(s). 1656 */ 1657 s = locate_scheduler(nfs->sched_nr); 1658 config_aqm(fs, ep, s != NULL && s->siht != NULL); 1659 } 1660 #endif 1661 break; /* no change, nothing to do */ 1662 } 1663 if (oldc != dn_cfg.fsk_count) /* new item */ 1664 dn_cfg.id++; 1665 s = locate_scheduler(nfs->sched_nr); 1666 /* detach from old scheduler if needed, preserving 1667 * queues if we need to reattach. Then update the 1668 * configuration, and possibly attach to the new sched. 1669 */ 1670 DX(2, "fs %d changed sched %d@%p to %d@%p", 1671 fs->fs.fs_nr, 1672 fs->fs.sched_nr, fs->sched, nfs->sched_nr, s); 1673 if (fs->sched) { 1674 int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY); 1675 flags |= DN_DESTROY; /* XXX temporary */ 1676 fsk_detach(fs, flags); 1677 } 1678 fs->fs = *nfs; /* copy configuration */ 1679 #ifdef NEW_AQM 1680 fs->aqmfp = NULL; 1681 if (ep != NULL) 1682 config_aqm(fs, ep, s != NULL && 1683 s->siht != NULL); 1684 #endif 1685 if (s != NULL) 1686 fsk_attach(fs, s); 1687 } while (0); 1688 if (!locked) 1689 DN_BH_WUNLOCK(); 1690 #ifdef NEW_AQM 1691 if (ep != NULL) 1692 free(ep, M_TEMP); 1693 #endif 1694 return fs; 1695 } 1696 1697 /* 1698 * config/reconfig a scheduler and its FIFO variant. 1699 * For !MULTIQUEUE schedulers, also set up the flowset. 1700 * 1701 * On reconfigurations (detected because s->fp is set), 1702 * detach existing flowsets preserving traffic, preserve link, 1703 * and delete the old scheduler creating a new one. 1704 */ 1705 static int 1706 config_sched(struct dn_sch *_nsch, struct dn_id *arg) 1707 { 1708 struct dn_schk *s; 1709 struct schk_new_arg a; /* argument for schk_new */ 1710 int i; 1711 struct dn_link p; /* copy of oldlink */ 1712 struct dn_profile *pf = NULL; /* copy of old link profile */ 1713 /* Used to preserv mask parameter */ 1714 struct ipfw_flow_id new_mask; 1715 int new_buckets = 0; 1716 int new_flags = 0; 1717 int pipe_cmd; 1718 int err = ENOMEM; 1719 1720 a.sch = _nsch; 1721 if (a.sch->oid.len != sizeof(*a.sch)) { 1722 D("bad sched len %d", a.sch->oid.len); 1723 return EINVAL; 1724 } 1725 i = a.sch->sched_nr; 1726 if (i <= 0 || i >= DN_MAX_ID) 1727 return EINVAL; 1728 /* make sure we have some buckets */ 1729 if (a.sch->flags & DN_HAVE_MASK) 1730 ipdn_bound_var((int *)&a.sch->buckets, dn_cfg.hash_size, 1731 1, dn_cfg.max_hash_size, "sched buckets"); 1732 /* XXX other sanity checks */ 1733 bzero(&p, sizeof(p)); 1734 1735 pipe_cmd = a.sch->flags & DN_PIPE_CMD; 1736 a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set? 1737 if (pipe_cmd) { 1738 /* Copy mask parameter */ 1739 new_mask = a.sch->sched_mask; 1740 new_buckets = a.sch->buckets; 1741 new_flags = a.sch->flags; 1742 } 1743 DN_BH_WLOCK(); 1744 again: /* run twice, for wfq and fifo */ 1745 /* 1746 * lookup the type. If not supplied, use the previous one 1747 * or default to WF2Q+. Otherwise, return an error. 1748 */ 1749 dn_cfg.id++; 1750 a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name); 1751 if (a.fp != NULL) { 1752 /* found. Lookup or create entry */ 1753 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a); 1754 } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) { 1755 /* No type. search existing s* or retry with WF2Q+ */ 1756 s = dn_ht_find(dn_cfg.schedhash, i, 0, &a); 1757 if (s != NULL) { 1758 a.fp = s->fp; 1759 /* Scheduler exists, skip to FIFO scheduler 1760 * if command was pipe config... 1761 */ 1762 if (pipe_cmd) 1763 goto next; 1764 } else { 1765 /* New scheduler, create a wf2q+ with no mask 1766 * if command was pipe config... 1767 */ 1768 if (pipe_cmd) { 1769 /* clear mask parameter */ 1770 bzero(&a.sch->sched_mask, sizeof(new_mask)); 1771 a.sch->buckets = 0; 1772 a.sch->flags &= ~DN_HAVE_MASK; 1773 } 1774 a.sch->oid.subtype = DN_SCHED_WF2QP; 1775 goto again; 1776 } 1777 } else { 1778 D("invalid scheduler type %d %s", 1779 a.sch->oid.subtype, a.sch->name); 1780 err = EINVAL; 1781 goto error; 1782 } 1783 /* normalize name and subtype */ 1784 a.sch->oid.subtype = a.fp->type; 1785 bzero(a.sch->name, sizeof(a.sch->name)); 1786 strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name)); 1787 if (s == NULL) { 1788 D("cannot allocate scheduler %d", i); 1789 goto error; 1790 } 1791 /* restore existing link if any */ 1792 if (p.link_nr) { 1793 s->link = p; 1794 if (!pf || pf->link_nr != p.link_nr) { /* no saved value */ 1795 s->profile = NULL; /* XXX maybe not needed */ 1796 } else { 1797 s->profile = malloc(sizeof(struct dn_profile), 1798 M_DUMMYNET, M_NOWAIT | M_ZERO); 1799 if (s->profile == NULL) { 1800 D("cannot allocate profile"); 1801 goto error; //XXX 1802 } 1803 memcpy(s->profile, pf, sizeof(*pf)); 1804 } 1805 } 1806 p.link_nr = 0; 1807 if (s->fp == NULL) { 1808 DX(2, "sched %d new type %s", i, a.fp->name); 1809 } else if (s->fp != a.fp || 1810 bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) { 1811 /* already existing. */ 1812 DX(2, "sched %d type changed from %s to %s", 1813 i, s->fp->name, a.fp->name); 1814 DX(4, " type/sub %d/%d -> %d/%d", 1815 s->sch.oid.type, s->sch.oid.subtype, 1816 a.sch->oid.type, a.sch->oid.subtype); 1817 if (s->link.link_nr == 0) 1818 D("XXX WARNING link 0 for sched %d", i); 1819 p = s->link; /* preserve link */ 1820 if (s->profile) {/* preserve profile */ 1821 if (!pf) 1822 pf = malloc(sizeof(*pf), 1823 M_DUMMYNET, M_NOWAIT | M_ZERO); 1824 if (pf) /* XXX should issue a warning otherwise */ 1825 memcpy(pf, s->profile, sizeof(*pf)); 1826 } 1827 /* remove from the hash */ 1828 dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 1829 /* Detach flowsets, preserve queues. */ 1830 // schk_delete_cb(s, NULL); 1831 // XXX temporarily, kill queues 1832 schk_delete_cb(s, (void *)DN_DESTROY); 1833 goto again; 1834 } else { 1835 DX(4, "sched %d unchanged type %s", i, a.fp->name); 1836 } 1837 /* complete initialization */ 1838 s->sch = *a.sch; 1839 s->fp = a.fp; 1840 s->cfg = arg; 1841 // XXX schk_reset_credit(s); 1842 /* create the internal flowset if needed, 1843 * trying to reuse existing ones if available 1844 */ 1845 if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) { 1846 s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL); 1847 if (!s->fs) { 1848 struct dn_fs fs; 1849 bzero(&fs, sizeof(fs)); 1850 set_oid(&fs.oid, DN_FS, sizeof(fs)); 1851 fs.fs_nr = i + DN_MAX_ID; 1852 fs.sched_nr = i; 1853 s->fs = config_fs(&fs, NULL, 1 /* locked */); 1854 } 1855 if (!s->fs) { 1856 schk_delete_cb(s, (void *)DN_DESTROY); 1857 D("error creating internal fs for %d", i); 1858 goto error; 1859 } 1860 } 1861 /* call init function after the flowset is created */ 1862 if (s->fp->config) 1863 s->fp->config(s); 1864 update_fs(s); 1865 next: 1866 if (i < DN_MAX_ID) { /* now configure the FIFO instance */ 1867 i += DN_MAX_ID; 1868 if (pipe_cmd) { 1869 /* Restore mask parameter for FIFO */ 1870 a.sch->sched_mask = new_mask; 1871 a.sch->buckets = new_buckets; 1872 a.sch->flags = new_flags; 1873 } else { 1874 /* sched config shouldn't modify the FIFO scheduler */ 1875 if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) { 1876 /* FIFO already exist, don't touch it */ 1877 err = 0; /* and this is not an error */ 1878 goto error; 1879 } 1880 } 1881 a.sch->sched_nr = i; 1882 a.sch->oid.subtype = DN_SCHED_FIFO; 1883 bzero(a.sch->name, sizeof(a.sch->name)); 1884 goto again; 1885 } 1886 err = 0; 1887 error: 1888 DN_BH_WUNLOCK(); 1889 if (pf) 1890 free(pf, M_DUMMYNET); 1891 return err; 1892 } 1893 1894 /* 1895 * attach a profile to a link 1896 */ 1897 static int 1898 config_profile(struct dn_profile *pf, struct dn_id *arg) 1899 { 1900 struct dn_schk *s; 1901 int i, olen, err = 0; 1902 1903 if (pf->oid.len < sizeof(*pf)) { 1904 D("short profile len %d", pf->oid.len); 1905 return EINVAL; 1906 } 1907 i = pf->link_nr; 1908 if (i <= 0 || i >= DN_MAX_ID) 1909 return EINVAL; 1910 /* XXX other sanity checks */ 1911 DN_BH_WLOCK(); 1912 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1913 s = locate_scheduler(i); 1914 1915 if (s == NULL) { 1916 err = EINVAL; 1917 break; 1918 } 1919 dn_cfg.id++; 1920 /* 1921 * If we had a profile and the new one does not fit, 1922 * or it is deleted, then we need to free memory. 1923 */ 1924 if (s->profile && (pf->samples_no == 0 || 1925 s->profile->oid.len < pf->oid.len)) { 1926 free(s->profile, M_DUMMYNET); 1927 s->profile = NULL; 1928 } 1929 if (pf->samples_no == 0) 1930 continue; 1931 /* 1932 * new profile, possibly allocate memory 1933 * and copy data. 1934 */ 1935 if (s->profile == NULL) 1936 s->profile = malloc(pf->oid.len, 1937 M_DUMMYNET, M_NOWAIT | M_ZERO); 1938 if (s->profile == NULL) { 1939 D("no memory for profile %d", i); 1940 err = ENOMEM; 1941 break; 1942 } 1943 /* preserve larger length XXX double check */ 1944 olen = s->profile->oid.len; 1945 if (olen < pf->oid.len) 1946 olen = pf->oid.len; 1947 memcpy(s->profile, pf, pf->oid.len); 1948 s->profile->oid.len = olen; 1949 } 1950 DN_BH_WUNLOCK(); 1951 return err; 1952 } 1953 1954 /* 1955 * Delete all objects: 1956 */ 1957 static void 1958 dummynet_flush(void) 1959 { 1960 1961 /* delete all schedulers and related links/queues/flowsets */ 1962 dn_ht_scan(dn_cfg.schedhash, schk_delete_cb, 1963 (void *)(uintptr_t)DN_DELETE_FS); 1964 /* delete all remaining (unlinked) flowsets */ 1965 DX(4, "still %d unlinked fs", dn_cfg.fsk_count); 1966 dn_ht_free(dn_cfg.fshash, DNHT_REMOVE); 1967 fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS); 1968 /* Reinitialize system heap... */ 1969 heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 1970 } 1971 1972 /* 1973 * Main handler for configuration. We are guaranteed to be called 1974 * with an oid which is at least a dn_id. 1975 * - the first object is the command (config, delete, flush, ...) 1976 * - config_link must be issued after the corresponding config_sched 1977 * - parameters (DN_TXT) for an object must precede the object 1978 * processed on a config_sched. 1979 */ 1980 int 1981 do_config(void *p, int l) 1982 { 1983 struct dn_id o; 1984 union { 1985 struct dn_profile profile; 1986 struct dn_fs fs; 1987 struct dn_link link; 1988 struct dn_sch sched; 1989 } *dn; 1990 struct dn_id *arg; 1991 uintptr_t a; 1992 int err, err2, off; 1993 1994 memcpy(&o, p, sizeof(o)); 1995 if (o.id != DN_API_VERSION) { 1996 D("invalid api version got %d need %d", o.id, DN_API_VERSION); 1997 return EINVAL; 1998 } 1999 arg = NULL; 2000 dn = NULL; 2001 for (off = 0; l >= sizeof(o); memcpy(&o, (char *)p + off, sizeof(o))) { 2002 if (o.len < sizeof(o) || l < o.len) { 2003 D("bad len o.len %d len %d", o.len, l); 2004 err = EINVAL; 2005 break; 2006 } 2007 l -= o.len; 2008 err = 0; 2009 switch (o.type) { 2010 default: 2011 D("cmd %d not implemented", o.type); 2012 break; 2013 2014 #ifdef EMULATE_SYSCTL 2015 /* sysctl emulation. 2016 * if we recognize the command, jump to the correct 2017 * handler and return 2018 */ 2019 case DN_SYSCTL_SET: 2020 err = kesysctl_emu_set(p, l); 2021 return err; 2022 #endif 2023 2024 case DN_CMD_CONFIG: /* simply a header */ 2025 break; 2026 2027 case DN_CMD_DELETE: 2028 /* the argument is in the first uintptr_t after o */ 2029 if (o.len < sizeof(o) + sizeof(a)) { 2030 err = EINVAL; 2031 break; 2032 } 2033 memcpy(&a, (char *)p + off + sizeof(o), sizeof(a)); 2034 switch (o.subtype) { 2035 case DN_LINK: 2036 /* delete base and derived schedulers */ 2037 DN_BH_WLOCK(); 2038 err = delete_schk(a); 2039 err2 = delete_schk(a + DN_MAX_ID); 2040 DN_BH_WUNLOCK(); 2041 if (!err) 2042 err = err2; 2043 break; 2044 2045 default: 2046 D("invalid delete type %d", o.subtype); 2047 err = EINVAL; 2048 break; 2049 2050 case DN_FS: 2051 err = (a < 1 || a >= DN_MAX_ID) ? 2052 EINVAL : delete_fs(a, 0) ; 2053 break; 2054 } 2055 break; 2056 2057 case DN_CMD_FLUSH: 2058 DN_BH_WLOCK(); 2059 dummynet_flush(); 2060 DN_BH_WUNLOCK(); 2061 break; 2062 case DN_TEXT: /* store argument of next block */ 2063 if (arg != NULL) 2064 free(arg, M_TEMP); 2065 arg = malloc(o.len, M_TEMP, M_WAITOK); 2066 memcpy(arg, (char *)p + off, o.len); 2067 break; 2068 case DN_LINK: 2069 if (dn == NULL) 2070 dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK); 2071 memcpy(&dn->link, (char *)p + off, sizeof(dn->link)); 2072 err = config_link(&dn->link, arg); 2073 break; 2074 case DN_PROFILE: 2075 if (dn == NULL) 2076 dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK); 2077 memcpy(&dn->profile, (char *)p + off, 2078 sizeof(dn->profile)); 2079 err = config_profile(&dn->profile, arg); 2080 break; 2081 case DN_SCH: 2082 if (dn == NULL) 2083 dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK); 2084 memcpy(&dn->sched, (char *)p + off, 2085 sizeof(dn->sched)); 2086 err = config_sched(&dn->sched, arg); 2087 break; 2088 case DN_FS: 2089 if (dn == NULL) 2090 dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK); 2091 memcpy(&dn->fs, (char *)p + off, sizeof(dn->fs)); 2092 err = (NULL == config_fs(&dn->fs, arg, 0)); 2093 break; 2094 } 2095 if (err != 0) 2096 break; 2097 off += o.len; 2098 } 2099 if (arg != NULL) 2100 free(arg, M_TEMP); 2101 if (dn != NULL) 2102 free(dn, M_TEMP); 2103 return err; 2104 } 2105 2106 static int 2107 compute_space(struct dn_id *cmd, struct copy_args *a) 2108 { 2109 int x = 0, need = 0; 2110 int profile_size = sizeof(struct dn_profile) - 2111 ED_MAX_SAMPLES_NO*sizeof(int); 2112 2113 /* NOTE about compute space: 2114 * NP = dn_cfg.schk_count 2115 * NSI = dn_cfg.si_count 2116 * NF = dn_cfg.fsk_count 2117 * NQ = dn_cfg.queue_count 2118 * - ipfw pipe show 2119 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 2120 * link, scheduler template, flowset 2121 * integrated in scheduler and header 2122 * for flowset list 2123 * (NSI)*(dn_flow) all scheduler instance (includes 2124 * the queue instance) 2125 * - ipfw sched show 2126 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 2127 * link, scheduler template, flowset 2128 * integrated in scheduler and header 2129 * for flowset list 2130 * (NSI * dn_flow) all scheduler instances 2131 * (NF * sizeof(uint_32)) space for flowset list linked to scheduler 2132 * (NQ * dn_queue) all queue [XXXfor now not listed] 2133 * - ipfw queue show 2134 * (NF * dn_fs) all flowset 2135 * (NQ * dn_queue) all queues 2136 */ 2137 switch (cmd->subtype) { 2138 default: 2139 return -1; 2140 /* XXX where do LINK and SCH differ ? */ 2141 /* 'ipfw sched show' could list all queues associated to 2142 * a scheduler. This feature for now is disabled 2143 */ 2144 case DN_LINK: /* pipe show */ 2145 x = DN_C_LINK | DN_C_SCH | DN_C_FLOW; 2146 need += dn_cfg.schk_count * 2147 (sizeof(struct dn_fs) + profile_size) / 2; 2148 need += dn_cfg.fsk_count * sizeof(uint32_t); 2149 break; 2150 case DN_SCH: /* sched show */ 2151 need += dn_cfg.schk_count * 2152 (sizeof(struct dn_fs) + profile_size) / 2; 2153 need += dn_cfg.fsk_count * sizeof(uint32_t); 2154 x = DN_C_SCH | DN_C_LINK | DN_C_FLOW; 2155 break; 2156 case DN_FS: /* queue show */ 2157 x = DN_C_FS | DN_C_QUEUE; 2158 break; 2159 case DN_GET_COMPAT: /* compatibility mode */ 2160 need = dn_compat_calc_size(); 2161 break; 2162 } 2163 a->flags = x; 2164 if (x & DN_C_SCH) { 2165 need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2; 2166 /* NOT also, each fs might be attached to a sched */ 2167 need += dn_cfg.schk_count * sizeof(struct dn_id) / 2; 2168 } 2169 if (x & DN_C_FS) 2170 need += dn_cfg.fsk_count * sizeof(struct dn_fs); 2171 if (x & DN_C_LINK) { 2172 need += dn_cfg.schk_count * sizeof(struct dn_link) / 2; 2173 } 2174 /* 2175 * When exporting a queue to userland, only pass up the 2176 * struct dn_flow, which is the only visible part. 2177 */ 2178 2179 if (x & DN_C_QUEUE) 2180 need += dn_cfg.queue_count * sizeof(struct dn_flow); 2181 if (x & DN_C_FLOW) 2182 need += dn_cfg.si_count * (sizeof(struct dn_flow)); 2183 return need; 2184 } 2185 2186 /* 2187 * If compat != NULL dummynet_get is called in compatibility mode. 2188 * *compat will be the pointer to the buffer to pass to ipfw 2189 */ 2190 int 2191 dummynet_get(struct sockopt *sopt, void **compat) 2192 { 2193 int have, i, need, error; 2194 char *start = NULL, *buf; 2195 size_t sopt_valsize; 2196 struct dn_id *cmd; 2197 struct copy_args a; 2198 struct copy_range r; 2199 int l = sizeof(struct dn_id); 2200 2201 bzero(&a, sizeof(a)); 2202 bzero(&r, sizeof(r)); 2203 2204 /* save and restore original sopt_valsize around copyin */ 2205 sopt_valsize = sopt->sopt_valsize; 2206 2207 cmd = &r.o; 2208 2209 if (!compat) { 2210 /* copy at least an oid, and possibly a full object */ 2211 error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); 2212 sopt->sopt_valsize = sopt_valsize; 2213 if (error) 2214 goto done; 2215 l = cmd->len; 2216 #ifdef EMULATE_SYSCTL 2217 /* sysctl emulation. */ 2218 if (cmd->type == DN_SYSCTL_GET) 2219 return kesysctl_emu_get(sopt); 2220 #endif 2221 if (l > sizeof(r)) { 2222 /* request larger than default, allocate buffer */ 2223 cmd = malloc(l, M_DUMMYNET, M_WAITOK); 2224 error = sooptcopyin(sopt, cmd, l, l); 2225 sopt->sopt_valsize = sopt_valsize; 2226 if (error) 2227 goto done; 2228 } 2229 } else { /* compatibility */ 2230 error = 0; 2231 cmd->type = DN_CMD_GET; 2232 cmd->len = sizeof(struct dn_id); 2233 cmd->subtype = DN_GET_COMPAT; 2234 // cmd->id = sopt_valsize; 2235 D("compatibility mode"); 2236 } 2237 2238 #ifdef NEW_AQM 2239 /* get AQM params */ 2240 if(cmd->subtype == DN_AQM_PARAMS) { 2241 error = get_aqm_parms(sopt); 2242 goto done; 2243 /* get Scheduler params */ 2244 } else if (cmd->subtype == DN_SCH_PARAMS) { 2245 error = get_sched_parms(sopt); 2246 goto done; 2247 } 2248 #endif 2249 2250 a.extra = (struct copy_range *)cmd; 2251 if (cmd->len == sizeof(*cmd)) { /* no range, create a default */ 2252 uint32_t *rp = (uint32_t *)(cmd + 1); 2253 cmd->len += 2* sizeof(uint32_t); 2254 rp[0] = 1; 2255 rp[1] = DN_MAX_ID - 1; 2256 if (cmd->subtype == DN_LINK) { 2257 rp[0] += DN_MAX_ID; 2258 rp[1] += DN_MAX_ID; 2259 } 2260 } 2261 /* Count space (under lock) and allocate (outside lock). 2262 * Exit with lock held if we manage to get enough buffer. 2263 * Try a few times then give up. 2264 */ 2265 for (have = 0, i = 0; i < 10; i++) { 2266 DN_BH_WLOCK(); 2267 need = compute_space(cmd, &a); 2268 2269 /* if there is a range, ignore value from compute_space() */ 2270 if (l > sizeof(*cmd)) 2271 need = sopt_valsize - sizeof(*cmd); 2272 2273 if (need < 0) { 2274 DN_BH_WUNLOCK(); 2275 error = EINVAL; 2276 goto done; 2277 } 2278 need += sizeof(*cmd); 2279 cmd->id = need; 2280 if (have >= need) 2281 break; 2282 2283 DN_BH_WUNLOCK(); 2284 if (start) 2285 free(start, M_DUMMYNET); 2286 start = NULL; 2287 if (need > sopt_valsize) 2288 break; 2289 2290 have = need; 2291 start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO); 2292 } 2293 2294 if (start == NULL) { 2295 if (compat) { 2296 *compat = NULL; 2297 error = 1; // XXX 2298 } else { 2299 error = sooptcopyout(sopt, cmd, sizeof(*cmd)); 2300 } 2301 goto done; 2302 } 2303 ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " 2304 "%d:%d si %d, %d:%d queues %d", 2305 dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH, 2306 dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK, 2307 dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS, 2308 dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I, 2309 dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE); 2310 sopt->sopt_valsize = sopt_valsize; 2311 a.type = cmd->subtype; 2312 2313 if (compat == NULL) { 2314 memcpy(start, cmd, sizeof(*cmd)); 2315 ((struct dn_id*)(start))->len = sizeof(struct dn_id); 2316 buf = start + sizeof(*cmd); 2317 } else 2318 buf = start; 2319 a.start = &buf; 2320 a.end = start + have; 2321 /* start copying other objects */ 2322 if (compat) { 2323 a.type = DN_COMPAT_PIPE; 2324 dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a); 2325 a.type = DN_COMPAT_QUEUE; 2326 dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a); 2327 } else if (a.type == DN_FS) { 2328 dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a); 2329 } else { 2330 dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a); 2331 } 2332 DN_BH_WUNLOCK(); 2333 2334 if (compat) { 2335 *compat = start; 2336 sopt->sopt_valsize = buf - start; 2337 /* free() is done by ip_dummynet_compat() */ 2338 start = NULL; //XXX hack 2339 } else { 2340 error = sooptcopyout(sopt, start, buf - start); 2341 } 2342 done: 2343 if (cmd && cmd != &r.o) 2344 free(cmd, M_DUMMYNET); 2345 if (start) 2346 free(start, M_DUMMYNET); 2347 return error; 2348 } 2349 2350 /* Callback called on scheduler instance to delete it if idle */ 2351 static int 2352 drain_scheduler_cb(void *_si, void *arg) 2353 { 2354 struct dn_sch_inst *si = _si; 2355 2356 if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL) 2357 return 0; 2358 2359 if (si->sched->fp->flags & DN_MULTIQUEUE) { 2360 if (si->q_count == 0) 2361 return si_destroy(si, NULL); 2362 else 2363 return 0; 2364 } else { /* !DN_MULTIQUEUE */ 2365 if ((si+1)->ni.length == 0) 2366 return si_destroy(si, NULL); 2367 else 2368 return 0; 2369 } 2370 return 0; /* unreachable */ 2371 } 2372 2373 /* Callback called on scheduler to check if it has instances */ 2374 static int 2375 drain_scheduler_sch_cb(void *_s, void *arg) 2376 { 2377 struct dn_schk *s = _s; 2378 2379 if (s->sch.flags & DN_HAVE_MASK) { 2380 dn_ht_scan_bucket(s->siht, &s->drain_bucket, 2381 drain_scheduler_cb, NULL); 2382 s->drain_bucket++; 2383 } else { 2384 if (s->siht) { 2385 if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL) 2386 s->siht = NULL; 2387 } 2388 } 2389 return 0; 2390 } 2391 2392 /* Called every tick, try to delete a 'bucket' of scheduler */ 2393 void 2394 dn_drain_scheduler(void) 2395 { 2396 dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch, 2397 drain_scheduler_sch_cb, NULL); 2398 dn_cfg.drain_sch++; 2399 } 2400 2401 /* Callback called on queue to delete if it is idle */ 2402 static int 2403 drain_queue_cb(void *_q, void *arg) 2404 { 2405 struct dn_queue *q = _q; 2406 2407 if (q->ni.length == 0) { 2408 dn_delete_queue(q, DN_DESTROY); 2409 return DNHT_SCAN_DEL; /* queue is deleted */ 2410 } 2411 2412 return 0; /* queue isn't deleted */ 2413 } 2414 2415 /* Callback called on flowset used to check if it has queues */ 2416 static int 2417 drain_queue_fs_cb(void *_fs, void *arg) 2418 { 2419 struct dn_fsk *fs = _fs; 2420 2421 if (fs->fs.flags & DN_QHT_HASH) { 2422 /* Flowset has a hash table for queues */ 2423 dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, 2424 drain_queue_cb, NULL); 2425 fs->drain_bucket++; 2426 } else { 2427 /* No hash table for this flowset, null the pointer 2428 * if the queue is deleted 2429 */ 2430 if (fs->qht) { 2431 if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL) 2432 fs->qht = NULL; 2433 } 2434 } 2435 return 0; 2436 } 2437 2438 /* Called every tick, try to delete a 'bucket' of queue */ 2439 void 2440 dn_drain_queue(void) 2441 { 2442 /* scan a bucket of flowset */ 2443 dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs, 2444 drain_queue_fs_cb, NULL); 2445 dn_cfg.drain_fs++; 2446 } 2447 2448 /* 2449 * Handler for the various dummynet socket options 2450 */ 2451 static int 2452 ip_dn_ctl(struct sockopt *sopt) 2453 { 2454 void *p = NULL; 2455 int error, l; 2456 2457 error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); 2458 if (error) 2459 return (error); 2460 2461 /* Disallow sets in really-really secure mode. */ 2462 if (sopt->sopt_dir == SOPT_SET) { 2463 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 2464 if (error) 2465 return (error); 2466 } 2467 2468 switch (sopt->sopt_name) { 2469 default : 2470 D("dummynet: unknown option %d", sopt->sopt_name); 2471 error = EINVAL; 2472 break; 2473 2474 case IP_DUMMYNET_FLUSH: 2475 case IP_DUMMYNET_CONFIGURE: 2476 case IP_DUMMYNET_DEL: /* remove a pipe or queue */ 2477 case IP_DUMMYNET_GET: 2478 D("dummynet: compat option %d", sopt->sopt_name); 2479 error = ip_dummynet_compat(sopt); 2480 break; 2481 2482 case IP_DUMMYNET3 : 2483 if (sopt->sopt_dir == SOPT_GET) { 2484 error = dummynet_get(sopt, NULL); 2485 break; 2486 } 2487 l = sopt->sopt_valsize; 2488 if (l < sizeof(struct dn_id) || l > 12000) { 2489 D("argument len %d invalid", l); 2490 break; 2491 } 2492 p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ? 2493 error = sooptcopyin(sopt, p, l, l); 2494 if (error) 2495 break ; 2496 error = do_config(p, l); 2497 break; 2498 } 2499 2500 if (p != NULL) 2501 free(p, M_TEMP); 2502 2503 return error ; 2504 } 2505 2506 2507 static void 2508 ip_dn_init(void) 2509 { 2510 if (dn_cfg.init_done) 2511 return; 2512 printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet); 2513 dn_cfg.init_done = 1; 2514 /* Set defaults here. MSVC does not accept initializers, 2515 * and this is also useful for vimages 2516 */ 2517 /* queue limits */ 2518 dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */ 2519 dn_cfg.byte_limit = 1024 * 1024; 2520 dn_cfg.expire = 1; 2521 2522 /* RED parameters */ 2523 dn_cfg.red_lookup_depth = 256; /* default lookup table depth */ 2524 dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */ 2525 dn_cfg.red_max_pkt_size = 1500; /* default max packet size */ 2526 2527 /* hash tables */ 2528 dn_cfg.max_hash_size = 65536; /* max in the hash tables */ 2529 dn_cfg.hash_size = 64; /* default hash size */ 2530 2531 /* create hash tables for schedulers and flowsets. 2532 * In both we search by key and by pointer. 2533 */ 2534 dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size, 2535 offsetof(struct dn_schk, schk_next), 2536 schk_hash, schk_match, schk_new); 2537 dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size, 2538 offsetof(struct dn_fsk, fsk_next), 2539 fsk_hash, fsk_match, fsk_new); 2540 2541 /* bucket index to drain object */ 2542 dn_cfg.drain_fs = 0; 2543 dn_cfg.drain_sch = 0; 2544 2545 heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 2546 SLIST_INIT(&dn_cfg.fsu); 2547 SLIST_INIT(&dn_cfg.schedlist); 2548 2549 DN_LOCK_INIT(); 2550 2551 TASK_INIT(&dn_task, 0, dummynet_task, curvnet); 2552 dn_tq = taskqueue_create_fast("dummynet", M_WAITOK, 2553 taskqueue_thread_enqueue, &dn_tq); 2554 taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); 2555 2556 callout_init(&dn_timeout, 1); 2557 dn_reschedule(); 2558 2559 /* Initialize curr_time adjustment mechanics. */ 2560 getmicrouptime(&dn_cfg.prev_t); 2561 } 2562 2563 static void 2564 ip_dn_destroy(int last) 2565 { 2566 DN_BH_WLOCK(); 2567 /* ensure no more callouts are started */ 2568 dn_gone = 1; 2569 2570 /* check for last */ 2571 if (last) { 2572 ND("removing last instance\n"); 2573 ip_dn_ctl_ptr = NULL; 2574 ip_dn_io_ptr = NULL; 2575 } 2576 2577 dummynet_flush(); 2578 DN_BH_WUNLOCK(); 2579 2580 callout_drain(&dn_timeout); 2581 taskqueue_drain(dn_tq, &dn_task); 2582 taskqueue_free(dn_tq); 2583 2584 dn_ht_free(dn_cfg.schedhash, 0); 2585 dn_ht_free(dn_cfg.fshash, 0); 2586 heap_free(&dn_cfg.evheap); 2587 2588 DN_LOCK_DESTROY(); 2589 } 2590 2591 static int 2592 dummynet_modevent(module_t mod, int type, void *data) 2593 { 2594 2595 if (type == MOD_LOAD) { 2596 if (ip_dn_io_ptr) { 2597 printf("DUMMYNET already loaded\n"); 2598 return EEXIST ; 2599 } 2600 ip_dn_init(); 2601 ip_dn_ctl_ptr = ip_dn_ctl; 2602 ip_dn_io_ptr = dummynet_io; 2603 return 0; 2604 } else if (type == MOD_UNLOAD) { 2605 ip_dn_destroy(1 /* last */); 2606 return 0; 2607 } else 2608 return EOPNOTSUPP; 2609 } 2610 2611 /* modevent helpers for the modules */ 2612 static int 2613 load_dn_sched(struct dn_alg *d) 2614 { 2615 struct dn_alg *s; 2616 2617 if (d == NULL) 2618 return 1; /* error */ 2619 ip_dn_init(); /* just in case, we need the lock */ 2620 2621 /* Check that mandatory funcs exists */ 2622 if (d->enqueue == NULL || d->dequeue == NULL) { 2623 D("missing enqueue or dequeue for %s", d->name); 2624 return 1; 2625 } 2626 2627 /* Search if scheduler already exists */ 2628 DN_BH_WLOCK(); 2629 SLIST_FOREACH(s, &dn_cfg.schedlist, next) { 2630 if (strcmp(s->name, d->name) == 0) { 2631 D("%s already loaded", d->name); 2632 break; /* scheduler already exists */ 2633 } 2634 } 2635 if (s == NULL) 2636 SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next); 2637 DN_BH_WUNLOCK(); 2638 D("dn_sched %s %sloaded", d->name, s ? "not ":""); 2639 return s ? 1 : 0; 2640 } 2641 2642 static int 2643 unload_dn_sched(struct dn_alg *s) 2644 { 2645 struct dn_alg *tmp, *r; 2646 int err = EINVAL; 2647 2648 ND("called for %s", s->name); 2649 2650 DN_BH_WLOCK(); 2651 SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) { 2652 if (strcmp(s->name, r->name) != 0) 2653 continue; 2654 ND("ref_count = %d", r->ref_count); 2655 err = (r->ref_count != 0) ? EBUSY : 0; 2656 if (err == 0) 2657 SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next); 2658 break; 2659 } 2660 DN_BH_WUNLOCK(); 2661 D("dn_sched %s %sunloaded", s->name, err ? "not ":""); 2662 return err; 2663 } 2664 2665 int 2666 dn_sched_modevent(module_t mod, int cmd, void *arg) 2667 { 2668 struct dn_alg *sch = arg; 2669 2670 if (cmd == MOD_LOAD) 2671 return load_dn_sched(sch); 2672 else if (cmd == MOD_UNLOAD) 2673 return unload_dn_sched(sch); 2674 else 2675 return EINVAL; 2676 } 2677 2678 static moduledata_t dummynet_mod = { 2679 "dummynet", dummynet_modevent, NULL 2680 }; 2681 2682 #define DN_SI_SUB SI_SUB_PROTO_FIREWALL 2683 #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ 2684 DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); 2685 MODULE_DEPEND(dummynet, ipfw, 3, 3, 3); 2686 MODULE_VERSION(dummynet, 3); 2687 2688 /* 2689 * Starting up. Done in order after dummynet_modevent() has been called. 2690 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2691 */ 2692 //VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL); 2693 2694 /* 2695 * Shutdown handlers up shop. These are done in REVERSE ORDER, but still 2696 * after dummynet_modevent() has been called. Not called on reboot. 2697 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2698 * or when the module is unloaded. 2699 */ 2700 //VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL); 2701 2702 #ifdef NEW_AQM 2703 2704 /* modevent helpers for the AQM modules */ 2705 static int 2706 load_dn_aqm(struct dn_aqm *d) 2707 { 2708 struct dn_aqm *aqm=NULL; 2709 2710 if (d == NULL) 2711 return 1; /* error */ 2712 ip_dn_init(); /* just in case, we need the lock */ 2713 2714 /* Check that mandatory funcs exists */ 2715 if (d->enqueue == NULL || d->dequeue == NULL) { 2716 D("missing enqueue or dequeue for %s", d->name); 2717 return 1; 2718 } 2719 2720 /* Search if AQM already exists */ 2721 DN_BH_WLOCK(); 2722 SLIST_FOREACH(aqm, &dn_cfg.aqmlist, next) { 2723 if (strcmp(aqm->name, d->name) == 0) { 2724 D("%s already loaded", d->name); 2725 break; /* AQM already exists */ 2726 } 2727 } 2728 if (aqm == NULL) 2729 SLIST_INSERT_HEAD(&dn_cfg.aqmlist, d, next); 2730 DN_BH_WUNLOCK(); 2731 D("dn_aqm %s %sloaded", d->name, aqm ? "not ":""); 2732 return aqm ? 1 : 0; 2733 } 2734 2735 2736 /* Callback to clean up AQM status for queues connected to a flowset 2737 * and then deconfigure the flowset. 2738 * This function is called before an AQM module is unloaded 2739 */ 2740 static int 2741 fs_cleanup(void *_fs, void *arg) 2742 { 2743 struct dn_fsk *fs = _fs; 2744 uint32_t type = *(uint32_t *)arg; 2745 2746 if (fs->aqmfp && fs->aqmfp->type == type) 2747 aqm_cleanup_deconfig_fs(fs); 2748 2749 return 0; 2750 } 2751 2752 static int 2753 unload_dn_aqm(struct dn_aqm *aqm) 2754 { 2755 struct dn_aqm *tmp, *r; 2756 int err = EINVAL; 2757 err = 0; 2758 ND("called for %s", aqm->name); 2759 2760 DN_BH_WLOCK(); 2761 2762 /* clean up AQM status and deconfig flowset */ 2763 dn_ht_scan(dn_cfg.fshash, fs_cleanup, &aqm->type); 2764 2765 SLIST_FOREACH_SAFE(r, &dn_cfg.aqmlist, next, tmp) { 2766 if (strcmp(aqm->name, r->name) != 0) 2767 continue; 2768 ND("ref_count = %d", r->ref_count); 2769 err = (r->ref_count != 0 || r->cfg_ref_count != 0) ? EBUSY : 0; 2770 if (err == 0) 2771 SLIST_REMOVE(&dn_cfg.aqmlist, r, dn_aqm, next); 2772 break; 2773 } 2774 DN_BH_WUNLOCK(); 2775 D("%s %sunloaded", aqm->name, err ? "not ":""); 2776 if (err) 2777 D("ref_count=%d, cfg_ref_count=%d", r->ref_count, r->cfg_ref_count); 2778 return err; 2779 } 2780 2781 int 2782 dn_aqm_modevent(module_t mod, int cmd, void *arg) 2783 { 2784 struct dn_aqm *aqm = arg; 2785 2786 if (cmd == MOD_LOAD) 2787 return load_dn_aqm(aqm); 2788 else if (cmd == MOD_UNLOAD) 2789 return unload_dn_aqm(aqm); 2790 else 2791 return EINVAL; 2792 } 2793 #endif 2794 2795 /* end of file */ 2796 2797