1 /*- 2 * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa 3 * Portions Copyright (c) 2000 Akamba Corp. 4 * All rights reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 /* 32 * Configuration and internal object management for dummynet. 33 */ 34 35 #include "opt_inet6.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/module.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/rwlock.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/time.h> 50 #include <sys/taskqueue.h> 51 #include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ 52 #include <netinet/in.h> 53 #include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ 54 #include <netinet/ip_fw.h> 55 #include <netinet/ip_dummynet.h> 56 57 #include <netpfil/ipfw/ip_fw_private.h> 58 #include <netpfil/ipfw/dn_heap.h> 59 #include <netpfil/ipfw/ip_dn_private.h> 60 #include <netpfil/ipfw/dn_sched.h> 61 62 /* which objects to copy */ 63 #define DN_C_LINK 0x01 64 #define DN_C_SCH 0x02 65 #define DN_C_FLOW 0x04 66 #define DN_C_FS 0x08 67 #define DN_C_QUEUE 0x10 68 69 /* we use this argument in case of a schk_new */ 70 struct schk_new_arg { 71 struct dn_alg *fp; 72 struct dn_sch *sch; 73 }; 74 75 /*---- callout hooks. ----*/ 76 static struct callout dn_timeout; 77 static struct task dn_task; 78 static struct taskqueue *dn_tq = NULL; 79 80 static void 81 dummynet(void *arg) 82 { 83 84 (void)arg; /* UNUSED */ 85 taskqueue_enqueue(dn_tq, &dn_task); 86 } 87 88 void 89 dn_reschedule(void) 90 { 91 callout_reset(&dn_timeout, 1, dummynet, NULL); 92 } 93 /*----- end of callout hooks -----*/ 94 95 /* Return a scheduler descriptor given the type or name. */ 96 static struct dn_alg * 97 find_sched_type(int type, char *name) 98 { 99 struct dn_alg *d; 100 101 SLIST_FOREACH(d, &dn_cfg.schedlist, next) { 102 if (d->type == type || (name && !strcasecmp(d->name, name))) 103 return d; 104 } 105 return NULL; /* not found */ 106 } 107 108 int 109 ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) 110 { 111 int oldv = *v; 112 const char *op = NULL; 113 if (dflt < lo) 114 dflt = lo; 115 if (dflt > hi) 116 dflt = hi; 117 if (oldv < lo) { 118 *v = dflt; 119 op = "Bump"; 120 } else if (oldv > hi) { 121 *v = hi; 122 op = "Clamp"; 123 } else 124 return *v; 125 if (op && msg) 126 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 127 return *v; 128 } 129 130 /*---- flow_id mask, hash and compare functions ---*/ 131 /* 132 * The flow_id includes the 5-tuple, the queue/pipe number 133 * which we store in the extra area in host order, 134 * and for ipv6 also the flow_id6. 135 * XXX see if we want the tos byte (can store in 'flags') 136 */ 137 static struct ipfw_flow_id * 138 flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) 139 { 140 int is_v6 = IS_IP6_FLOW_ID(id); 141 142 id->dst_port &= mask->dst_port; 143 id->src_port &= mask->src_port; 144 id->proto &= mask->proto; 145 id->extra &= mask->extra; 146 if (is_v6) { 147 APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); 148 APPLY_MASK(&id->src_ip6, &mask->src_ip6); 149 id->flow_id6 &= mask->flow_id6; 150 } else { 151 id->dst_ip &= mask->dst_ip; 152 id->src_ip &= mask->src_ip; 153 } 154 return id; 155 } 156 157 /* computes an OR of two masks, result in dst and also returned */ 158 static struct ipfw_flow_id * 159 flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) 160 { 161 int is_v6 = IS_IP6_FLOW_ID(dst); 162 163 dst->dst_port |= src->dst_port; 164 dst->src_port |= src->src_port; 165 dst->proto |= src->proto; 166 dst->extra |= src->extra; 167 if (is_v6) { 168 #define OR_MASK(_d, _s) \ 169 (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ 170 (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ 171 (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ 172 (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; 173 OR_MASK(&dst->dst_ip6, &src->dst_ip6); 174 OR_MASK(&dst->src_ip6, &src->src_ip6); 175 #undef OR_MASK 176 dst->flow_id6 |= src->flow_id6; 177 } else { 178 dst->dst_ip |= src->dst_ip; 179 dst->src_ip |= src->src_ip; 180 } 181 return dst; 182 } 183 184 static int 185 nonzero_mask(struct ipfw_flow_id *m) 186 { 187 if (m->dst_port || m->src_port || m->proto || m->extra) 188 return 1; 189 if (IS_IP6_FLOW_ID(m)) { 190 return 191 m->dst_ip6.__u6_addr.__u6_addr32[0] || 192 m->dst_ip6.__u6_addr.__u6_addr32[1] || 193 m->dst_ip6.__u6_addr.__u6_addr32[2] || 194 m->dst_ip6.__u6_addr.__u6_addr32[3] || 195 m->src_ip6.__u6_addr.__u6_addr32[0] || 196 m->src_ip6.__u6_addr.__u6_addr32[1] || 197 m->src_ip6.__u6_addr.__u6_addr32[2] || 198 m->src_ip6.__u6_addr.__u6_addr32[3] || 199 m->flow_id6; 200 } else { 201 return m->dst_ip || m->src_ip; 202 } 203 } 204 205 /* XXX we may want a better hash function */ 206 static uint32_t 207 flow_id_hash(struct ipfw_flow_id *id) 208 { 209 uint32_t i; 210 211 if (IS_IP6_FLOW_ID(id)) { 212 uint32_t *d = (uint32_t *)&id->dst_ip6; 213 uint32_t *s = (uint32_t *)&id->src_ip6; 214 i = (d[0] ) ^ (d[1]) ^ 215 (d[2] ) ^ (d[3]) ^ 216 (d[0] >> 15) ^ (d[1] >> 15) ^ 217 (d[2] >> 15) ^ (d[3] >> 15) ^ 218 (s[0] << 1) ^ (s[1] << 1) ^ 219 (s[2] << 1) ^ (s[3] << 1) ^ 220 (s[0] << 16) ^ (s[1] << 16) ^ 221 (s[2] << 16) ^ (s[3] << 16) ^ 222 (id->dst_port << 1) ^ (id->src_port) ^ 223 (id->extra) ^ 224 (id->proto ) ^ (id->flow_id6); 225 } else { 226 i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ 227 (id->src_ip << 1) ^ (id->src_ip >> 16) ^ 228 (id->extra) ^ 229 (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); 230 } 231 return i; 232 } 233 234 /* Like bcmp, returns 0 if ids match, 1 otherwise. */ 235 static int 236 flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) 237 { 238 int is_v6 = IS_IP6_FLOW_ID(id1); 239 240 if (!is_v6) { 241 if (IS_IP6_FLOW_ID(id2)) 242 return 1; /* different address families */ 243 244 return (id1->dst_ip == id2->dst_ip && 245 id1->src_ip == id2->src_ip && 246 id1->dst_port == id2->dst_port && 247 id1->src_port == id2->src_port && 248 id1->proto == id2->proto && 249 id1->extra == id2->extra) ? 0 : 1; 250 } 251 /* the ipv6 case */ 252 return ( 253 !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && 254 !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && 255 id1->dst_port == id2->dst_port && 256 id1->src_port == id2->src_port && 257 id1->proto == id2->proto && 258 id1->extra == id2->extra && 259 id1->flow_id6 == id2->flow_id6) ? 0 : 1; 260 } 261 /*--------- end of flow-id mask, hash and compare ---------*/ 262 263 /*--- support functions for the qht hashtable ---- 264 * Entries are hashed by flow-id 265 */ 266 static uint32_t 267 q_hash(uintptr_t key, int flags, void *arg) 268 { 269 /* compute the hash slot from the flow id */ 270 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 271 &((struct dn_queue *)key)->ni.fid : 272 (struct ipfw_flow_id *)key; 273 274 return flow_id_hash(id); 275 } 276 277 static int 278 q_match(void *obj, uintptr_t key, int flags, void *arg) 279 { 280 struct dn_queue *o = (struct dn_queue *)obj; 281 struct ipfw_flow_id *id2; 282 283 if (flags & DNHT_KEY_IS_OBJ) { 284 /* compare pointers */ 285 id2 = &((struct dn_queue *)key)->ni.fid; 286 } else { 287 id2 = (struct ipfw_flow_id *)key; 288 } 289 return (0 == flow_id_cmp(&o->ni.fid, id2)); 290 } 291 292 /* 293 * create a new queue instance for the given 'key'. 294 */ 295 static void * 296 q_new(uintptr_t key, int flags, void *arg) 297 { 298 struct dn_queue *q, *template = arg; 299 struct dn_fsk *fs = template->fs; 300 int size = sizeof(*q) + fs->sched->fp->q_datalen; 301 302 q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO); 303 if (q == NULL) { 304 D("no memory for new queue"); 305 return NULL; 306 } 307 308 set_oid(&q->ni.oid, DN_QUEUE, size); 309 if (fs->fs.flags & DN_QHT_HASH) 310 q->ni.fid = *(struct ipfw_flow_id *)key; 311 q->fs = fs; 312 q->_si = template->_si; 313 q->_si->q_count++; 314 315 if (fs->sched->fp->new_queue) 316 fs->sched->fp->new_queue(q); 317 dn_cfg.queue_count++; 318 return q; 319 } 320 321 /* 322 * Notify schedulers that a queue is going away. 323 * If (flags & DN_DESTROY), also free the packets. 324 * The version for callbacks is called q_delete_cb(). 325 */ 326 static void 327 dn_delete_queue(struct dn_queue *q, int flags) 328 { 329 struct dn_fsk *fs = q->fs; 330 331 // D("fs %p si %p\n", fs, q->_si); 332 /* notify the parent scheduler that the queue is going away */ 333 if (fs && fs->sched->fp->free_queue) 334 fs->sched->fp->free_queue(q); 335 q->_si->q_count--; 336 q->_si = NULL; 337 if (flags & DN_DESTROY) { 338 if (q->mq.head) 339 dn_free_pkts(q->mq.head); 340 bzero(q, sizeof(*q)); // safety 341 free(q, M_DUMMYNET); 342 dn_cfg.queue_count--; 343 } 344 } 345 346 static int 347 q_delete_cb(void *q, void *arg) 348 { 349 int flags = (int)(uintptr_t)arg; 350 dn_delete_queue(q, flags); 351 return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0; 352 } 353 354 /* 355 * calls dn_delete_queue/q_delete_cb on all queues, 356 * which notifies the parent scheduler and possibly drains packets. 357 * flags & DN_DESTROY: drains queues and destroy qht; 358 */ 359 static void 360 qht_delete(struct dn_fsk *fs, int flags) 361 { 362 ND("fs %d start flags %d qht %p", 363 fs->fs.fs_nr, flags, fs->qht); 364 if (!fs->qht) 365 return; 366 if (fs->fs.flags & DN_QHT_HASH) { 367 dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags); 368 if (flags & DN_DESTROY) { 369 dn_ht_free(fs->qht, 0); 370 fs->qht = NULL; 371 } 372 } else { 373 dn_delete_queue((struct dn_queue *)(fs->qht), flags); 374 if (flags & DN_DESTROY) 375 fs->qht = NULL; 376 } 377 } 378 379 /* 380 * Find and possibly create the queue for a MULTIQUEUE scheduler. 381 * We never call it for !MULTIQUEUE (the queue is in the sch_inst). 382 */ 383 struct dn_queue * 384 ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, 385 struct ipfw_flow_id *id) 386 { 387 struct dn_queue template; 388 389 template._si = si; 390 template.fs = fs; 391 392 if (fs->fs.flags & DN_QHT_HASH) { 393 struct ipfw_flow_id masked_id; 394 if (fs->qht == NULL) { 395 fs->qht = dn_ht_init(NULL, fs->fs.buckets, 396 offsetof(struct dn_queue, q_next), 397 q_hash, q_match, q_new); 398 if (fs->qht == NULL) 399 return NULL; 400 } 401 masked_id = *id; 402 flow_id_mask(&fs->fsk_mask, &masked_id); 403 return dn_ht_find(fs->qht, (uintptr_t)&masked_id, 404 DNHT_INSERT, &template); 405 } else { 406 if (fs->qht == NULL) 407 fs->qht = q_new(0, 0, &template); 408 return (struct dn_queue *)fs->qht; 409 } 410 } 411 /*--- end of queue hash table ---*/ 412 413 /*--- support functions for the sch_inst hashtable ---- 414 * 415 * These are hashed by flow-id 416 */ 417 static uint32_t 418 si_hash(uintptr_t key, int flags, void *arg) 419 { 420 /* compute the hash slot from the flow id */ 421 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 422 &((struct dn_sch_inst *)key)->ni.fid : 423 (struct ipfw_flow_id *)key; 424 425 return flow_id_hash(id); 426 } 427 428 static int 429 si_match(void *obj, uintptr_t key, int flags, void *arg) 430 { 431 struct dn_sch_inst *o = obj; 432 struct ipfw_flow_id *id2; 433 434 id2 = (flags & DNHT_KEY_IS_OBJ) ? 435 &((struct dn_sch_inst *)key)->ni.fid : 436 (struct ipfw_flow_id *)key; 437 return flow_id_cmp(&o->ni.fid, id2) == 0; 438 } 439 440 /* 441 * create a new instance for the given 'key' 442 * Allocate memory for instance, delay line and scheduler private data. 443 */ 444 static void * 445 si_new(uintptr_t key, int flags, void *arg) 446 { 447 struct dn_schk *s = arg; 448 struct dn_sch_inst *si; 449 int l = sizeof(*si) + s->fp->si_datalen; 450 451 si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 452 if (si == NULL) 453 goto error; 454 455 /* Set length only for the part passed up to userland. */ 456 set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); 457 set_oid(&(si->dline.oid), DN_DELAY_LINE, 458 sizeof(struct delay_line)); 459 /* mark si and dline as outside the event queue */ 460 si->ni.oid.id = si->dline.oid.id = -1; 461 462 si->sched = s; 463 si->dline.si = si; 464 465 if (s->fp->new_sched && s->fp->new_sched(si)) { 466 D("new_sched error"); 467 goto error; 468 } 469 if (s->sch.flags & DN_HAVE_MASK) 470 si->ni.fid = *(struct ipfw_flow_id *)key; 471 472 dn_cfg.si_count++; 473 return si; 474 475 error: 476 if (si) { 477 bzero(si, sizeof(*si)); // safety 478 free(si, M_DUMMYNET); 479 } 480 return NULL; 481 } 482 483 /* 484 * Callback from siht to delete all scheduler instances. Remove 485 * si and delay line from the system heap, destroy all queues. 486 * We assume that all flowset have been notified and do not 487 * point to us anymore. 488 */ 489 static int 490 si_destroy(void *_si, void *arg) 491 { 492 struct dn_sch_inst *si = _si; 493 struct dn_schk *s = si->sched; 494 struct delay_line *dl = &si->dline; 495 496 if (dl->oid.subtype) /* remove delay line from event heap */ 497 heap_extract(&dn_cfg.evheap, dl); 498 dn_free_pkts(dl->mq.head); /* drain delay line */ 499 if (si->kflags & DN_ACTIVE) /* remove si from event heap */ 500 heap_extract(&dn_cfg.evheap, si); 501 if (s->fp->free_sched) 502 s->fp->free_sched(si); 503 bzero(si, sizeof(*si)); /* safety */ 504 free(si, M_DUMMYNET); 505 dn_cfg.si_count--; 506 return DNHT_SCAN_DEL; 507 } 508 509 /* 510 * Find the scheduler instance for this packet. If we need to apply 511 * a mask, do on a local copy of the flow_id to preserve the original. 512 * Assume siht is always initialized if we have a mask. 513 */ 514 struct dn_sch_inst * 515 ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id) 516 { 517 518 if (s->sch.flags & DN_HAVE_MASK) { 519 struct ipfw_flow_id id_t = *id; 520 flow_id_mask(&s->sch.sched_mask, &id_t); 521 return dn_ht_find(s->siht, (uintptr_t)&id_t, 522 DNHT_INSERT, s); 523 } 524 if (!s->siht) 525 s->siht = si_new(0, 0, s); 526 return (struct dn_sch_inst *)s->siht; 527 } 528 529 /* callback to flush credit for the scheduler instance */ 530 static int 531 si_reset_credit(void *_si, void *arg) 532 { 533 struct dn_sch_inst *si = _si; 534 struct dn_link *p = &si->sched->link; 535 536 si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0); 537 return 0; 538 } 539 540 static void 541 schk_reset_credit(struct dn_schk *s) 542 { 543 if (s->sch.flags & DN_HAVE_MASK) 544 dn_ht_scan(s->siht, si_reset_credit, NULL); 545 else if (s->siht) 546 si_reset_credit(s->siht, NULL); 547 } 548 /*---- end of sch_inst hashtable ---------------------*/ 549 550 /*------------------------------------------------------- 551 * flowset hash (fshash) support. Entries are hashed by fs_nr. 552 * New allocations are put in the fsunlinked list, from which 553 * they are removed when they point to a specific scheduler. 554 */ 555 static uint32_t 556 fsk_hash(uintptr_t key, int flags, void *arg) 557 { 558 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 559 ((struct dn_fsk *)key)->fs.fs_nr; 560 561 return ( (i>>8)^(i>>4)^i ); 562 } 563 564 static int 565 fsk_match(void *obj, uintptr_t key, int flags, void *arg) 566 { 567 struct dn_fsk *fs = obj; 568 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 569 ((struct dn_fsk *)key)->fs.fs_nr; 570 571 return (fs->fs.fs_nr == i); 572 } 573 574 static void * 575 fsk_new(uintptr_t key, int flags, void *arg) 576 { 577 struct dn_fsk *fs; 578 579 fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO); 580 if (fs) { 581 set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs)); 582 dn_cfg.fsk_count++; 583 fs->drain_bucket = 0; 584 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 585 } 586 return fs; 587 } 588 589 /* 590 * detach flowset from its current scheduler. Flags as follows: 591 * DN_DETACH removes from the fsk_list 592 * DN_DESTROY deletes individual queues 593 * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked). 594 */ 595 static void 596 fsk_detach(struct dn_fsk *fs, int flags) 597 { 598 if (flags & DN_DELETE_FS) 599 flags |= DN_DESTROY; 600 ND("fs %d from sched %d flags %s %s %s", 601 fs->fs.fs_nr, fs->fs.sched_nr, 602 (flags & DN_DELETE_FS) ? "DEL_FS":"", 603 (flags & DN_DESTROY) ? "DEL":"", 604 (flags & DN_DETACH) ? "DET":""); 605 if (flags & DN_DETACH) { /* detach from the list */ 606 struct dn_fsk_head *h; 607 h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu; 608 SLIST_REMOVE(h, fs, dn_fsk, sch_chain); 609 } 610 /* Free the RED parameters, they will be recomputed on 611 * subsequent attach if needed. 612 */ 613 if (fs->w_q_lookup) 614 free(fs->w_q_lookup, M_DUMMYNET); 615 fs->w_q_lookup = NULL; 616 qht_delete(fs, flags); 617 if (fs->sched && fs->sched->fp->free_fsk) 618 fs->sched->fp->free_fsk(fs); 619 fs->sched = NULL; 620 if (flags & DN_DELETE_FS) { 621 bzero(fs, sizeof(fs)); /* safety */ 622 free(fs, M_DUMMYNET); 623 dn_cfg.fsk_count--; 624 } else { 625 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 626 } 627 } 628 629 /* 630 * Detach or destroy all flowsets in a list. 631 * flags specifies what to do: 632 * DN_DESTROY: flush all queues 633 * DN_DELETE_FS: DN_DESTROY + destroy flowset 634 * DN_DELETE_FS implies DN_DESTROY 635 */ 636 static void 637 fsk_detach_list(struct dn_fsk_head *h, int flags) 638 { 639 struct dn_fsk *fs; 640 int n = 0; /* only for stats */ 641 642 ND("head %p flags %x", h, flags); 643 while ((fs = SLIST_FIRST(h))) { 644 SLIST_REMOVE_HEAD(h, sch_chain); 645 n++; 646 fsk_detach(fs, flags); 647 } 648 ND("done %d flowsets", n); 649 } 650 651 /* 652 * called on 'queue X delete' -- removes the flowset from fshash, 653 * deletes all queues for the flowset, and removes the flowset. 654 */ 655 static int 656 delete_fs(int i, int locked) 657 { 658 struct dn_fsk *fs; 659 int err = 0; 660 661 if (!locked) 662 DN_BH_WLOCK(); 663 fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL); 664 ND("fs %d found %p", i, fs); 665 if (fs) { 666 fsk_detach(fs, DN_DETACH | DN_DELETE_FS); 667 err = 0; 668 } else 669 err = EINVAL; 670 if (!locked) 671 DN_BH_WUNLOCK(); 672 return err; 673 } 674 675 /*----- end of flowset hashtable support -------------*/ 676 677 /*------------------------------------------------------------ 678 * Scheduler hash. When searching by index we pass sched_nr, 679 * otherwise we pass struct dn_sch * which is the first field in 680 * struct dn_schk so we can cast between the two. We use this trick 681 * because in the create phase (but it should be fixed). 682 */ 683 static uint32_t 684 schk_hash(uintptr_t key, int flags, void *_arg) 685 { 686 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 687 ((struct dn_schk *)key)->sch.sched_nr; 688 return ( (i>>8)^(i>>4)^i ); 689 } 690 691 static int 692 schk_match(void *obj, uintptr_t key, int flags, void *_arg) 693 { 694 struct dn_schk *s = (struct dn_schk *)obj; 695 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 696 ((struct dn_schk *)key)->sch.sched_nr; 697 return (s->sch.sched_nr == i); 698 } 699 700 /* 701 * Create the entry and intialize with the sched hash if needed. 702 * Leave s->fp unset so we can tell whether a dn_ht_find() returns 703 * a new object or a previously existing one. 704 */ 705 static void * 706 schk_new(uintptr_t key, int flags, void *arg) 707 { 708 struct schk_new_arg *a = arg; 709 struct dn_schk *s; 710 int l = sizeof(*s) +a->fp->schk_datalen; 711 712 s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 713 if (s == NULL) 714 return NULL; 715 set_oid(&s->link.oid, DN_LINK, sizeof(s->link)); 716 s->sch = *a->sch; // copy initial values 717 s->link.link_nr = s->sch.sched_nr; 718 SLIST_INIT(&s->fsk_list); 719 /* initialize the hash table or create the single instance */ 720 s->fp = a->fp; /* si_new needs this */ 721 s->drain_bucket = 0; 722 if (s->sch.flags & DN_HAVE_MASK) { 723 s->siht = dn_ht_init(NULL, s->sch.buckets, 724 offsetof(struct dn_sch_inst, si_next), 725 si_hash, si_match, si_new); 726 if (s->siht == NULL) { 727 free(s, M_DUMMYNET); 728 return NULL; 729 } 730 } 731 s->fp = NULL; /* mark as a new scheduler */ 732 dn_cfg.schk_count++; 733 return s; 734 } 735 736 /* 737 * Callback for sched delete. Notify all attached flowsets to 738 * detach from the scheduler, destroy the internal flowset, and 739 * all instances. The scheduler goes away too. 740 * arg is 0 (only detach flowsets and destroy instances) 741 * DN_DESTROY (detach & delete queues, delete schk) 742 * or DN_DELETE_FS (delete queues and flowsets, delete schk) 743 */ 744 static int 745 schk_delete_cb(void *obj, void *arg) 746 { 747 struct dn_schk *s = obj; 748 #if 0 749 int a = (int)arg; 750 ND("sched %d arg %s%s", 751 s->sch.sched_nr, 752 a&DN_DESTROY ? "DEL ":"", 753 a&DN_DELETE_FS ? "DEL_FS":""); 754 #endif 755 fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); 756 /* no more flowset pointing to us now */ 757 if (s->sch.flags & DN_HAVE_MASK) { 758 dn_ht_scan(s->siht, si_destroy, NULL); 759 dn_ht_free(s->siht, 0); 760 } else if (s->siht) 761 si_destroy(s->siht, NULL); 762 if (s->profile) { 763 free(s->profile, M_DUMMYNET); 764 s->profile = NULL; 765 } 766 s->siht = NULL; 767 if (s->fp->destroy) 768 s->fp->destroy(s); 769 bzero(s, sizeof(*s)); // safety 770 free(obj, M_DUMMYNET); 771 dn_cfg.schk_count--; 772 return DNHT_SCAN_DEL; 773 } 774 775 /* 776 * called on a 'sched X delete' command. Deletes a single scheduler. 777 * This is done by removing from the schedhash, unlinking all 778 * flowsets and deleting their traffic. 779 */ 780 static int 781 delete_schk(int i) 782 { 783 struct dn_schk *s; 784 785 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 786 ND("%d %p", i, s); 787 if (!s) 788 return EINVAL; 789 delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */ 790 /* then detach flowsets, delete traffic */ 791 schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY); 792 return 0; 793 } 794 /*--- end of schk hashtable support ---*/ 795 796 static int 797 copy_obj(char **start, char *end, void *_o, const char *msg, int i) 798 { 799 struct dn_id *o = _o; 800 int have = end - *start; 801 802 if (have < o->len || o->len == 0 || o->type == 0) { 803 D("(WARN) type %d %s %d have %d need %d", 804 o->type, msg, i, have, o->len); 805 return 1; 806 } 807 ND("type %d %s %d len %d", o->type, msg, i, o->len); 808 bcopy(_o, *start, o->len); 809 if (o->type == DN_LINK) { 810 /* Adjust burst parameter for link */ 811 struct dn_link *l = (struct dn_link *)*start; 812 l->burst = div64(l->burst, 8 * hz); 813 l->delay = l->delay * 1000 / hz; 814 } else if (o->type == DN_SCH) { 815 /* Set id->id to the number of instances */ 816 struct dn_schk *s = _o; 817 struct dn_id *id = (struct dn_id *)(*start); 818 id->id = (s->sch.flags & DN_HAVE_MASK) ? 819 dn_ht_entries(s->siht) : (s->siht ? 1 : 0); 820 } 821 *start += o->len; 822 return 0; 823 } 824 825 /* Specific function to copy a queue. 826 * Copies only the user-visible part of a queue (which is in 827 * a struct dn_flow), and sets len accordingly. 828 */ 829 static int 830 copy_obj_q(char **start, char *end, void *_o, const char *msg, int i) 831 { 832 struct dn_id *o = _o; 833 int have = end - *start; 834 int len = sizeof(struct dn_flow); /* see above comment */ 835 836 if (have < len || o->len == 0 || o->type != DN_QUEUE) { 837 D("ERROR type %d %s %d have %d need %d", 838 o->type, msg, i, have, len); 839 return 1; 840 } 841 ND("type %d %s %d len %d", o->type, msg, i, len); 842 bcopy(_o, *start, len); 843 ((struct dn_id*)(*start))->len = len; 844 *start += len; 845 return 0; 846 } 847 848 static int 849 copy_q_cb(void *obj, void *arg) 850 { 851 struct dn_queue *q = obj; 852 struct copy_args *a = arg; 853 struct dn_flow *ni = (struct dn_flow *)(*a->start); 854 if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1)) 855 return DNHT_SCAN_END; 856 ni->oid.type = DN_FLOW; /* override the DN_QUEUE */ 857 ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL); 858 return 0; 859 } 860 861 static int 862 copy_q(struct copy_args *a, struct dn_fsk *fs, int flags) 863 { 864 if (!fs->qht) 865 return 0; 866 if (fs->fs.flags & DN_QHT_HASH) 867 dn_ht_scan(fs->qht, copy_q_cb, a); 868 else 869 copy_q_cb(fs->qht, a); 870 return 0; 871 } 872 873 /* 874 * This routine only copies the initial part of a profile ? XXX 875 */ 876 static int 877 copy_profile(struct copy_args *a, struct dn_profile *p) 878 { 879 int have = a->end - *a->start; 880 /* XXX here we check for max length */ 881 int profile_len = sizeof(struct dn_profile) - 882 ED_MAX_SAMPLES_NO*sizeof(int); 883 884 if (p == NULL) 885 return 0; 886 if (have < profile_len) { 887 D("error have %d need %d", have, profile_len); 888 return 1; 889 } 890 bcopy(p, *a->start, profile_len); 891 ((struct dn_id *)(*a->start))->len = profile_len; 892 *a->start += profile_len; 893 return 0; 894 } 895 896 static int 897 copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags) 898 { 899 struct dn_fs *ufs = (struct dn_fs *)(*a->start); 900 if (!fs) 901 return 0; 902 ND("flowset %d", fs->fs.fs_nr); 903 if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr)) 904 return DNHT_SCAN_END; 905 ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ? 906 dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0); 907 if (flags) { /* copy queues */ 908 copy_q(a, fs, 0); 909 } 910 return 0; 911 } 912 913 static int 914 copy_si_cb(void *obj, void *arg) 915 { 916 struct dn_sch_inst *si = obj; 917 struct copy_args *a = arg; 918 struct dn_flow *ni = (struct dn_flow *)(*a->start); 919 if (copy_obj(a->start, a->end, &si->ni, "inst", 920 si->sched->sch.sched_nr)) 921 return DNHT_SCAN_END; 922 ni->oid.type = DN_FLOW; /* override the DN_SCH_I */ 923 ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL); 924 return 0; 925 } 926 927 static int 928 copy_si(struct copy_args *a, struct dn_schk *s, int flags) 929 { 930 if (s->sch.flags & DN_HAVE_MASK) 931 dn_ht_scan(s->siht, copy_si_cb, a); 932 else if (s->siht) 933 copy_si_cb(s->siht, a); 934 return 0; 935 } 936 937 /* 938 * compute a list of children of a scheduler and copy up 939 */ 940 static int 941 copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags) 942 { 943 struct dn_fsk *fs; 944 struct dn_id *o; 945 uint32_t *p; 946 947 int n = 0, space = sizeof(*o); 948 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 949 if (fs->fs.fs_nr < DN_MAX_ID) 950 n++; 951 } 952 space += n * sizeof(uint32_t); 953 DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n); 954 if (a->end - *(a->start) < space) 955 return DNHT_SCAN_END; 956 o = (struct dn_id *)(*(a->start)); 957 o->len = space; 958 *a->start += o->len; 959 o->type = DN_TEXT; 960 p = (uint32_t *)(o+1); 961 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) 962 if (fs->fs.fs_nr < DN_MAX_ID) 963 *p++ = fs->fs.fs_nr; 964 return 0; 965 } 966 967 static int 968 copy_data_helper(void *_o, void *_arg) 969 { 970 struct copy_args *a = _arg; 971 uint32_t *r = a->extra->r; /* start of first range */ 972 uint32_t *lim; /* first invalid pointer */ 973 int n; 974 975 lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len); 976 977 if (a->type == DN_LINK || a->type == DN_SCH) { 978 /* pipe|sched show, we receive a dn_schk */ 979 struct dn_schk *s = _o; 980 981 n = s->sch.sched_nr; 982 if (a->type == DN_SCH && n >= DN_MAX_ID) 983 return 0; /* not a scheduler */ 984 if (a->type == DN_LINK && n <= DN_MAX_ID) 985 return 0; /* not a pipe */ 986 987 /* see if the object is within one of our ranges */ 988 for (;r < lim; r += 2) { 989 if (n < r[0] || n > r[1]) 990 continue; 991 /* Found a valid entry, copy and we are done */ 992 if (a->flags & DN_C_LINK) { 993 if (copy_obj(a->start, a->end, 994 &s->link, "link", n)) 995 return DNHT_SCAN_END; 996 if (copy_profile(a, s->profile)) 997 return DNHT_SCAN_END; 998 if (copy_flowset(a, s->fs, 0)) 999 return DNHT_SCAN_END; 1000 } 1001 if (a->flags & DN_C_SCH) { 1002 if (copy_obj(a->start, a->end, 1003 &s->sch, "sched", n)) 1004 return DNHT_SCAN_END; 1005 /* list all attached flowsets */ 1006 if (copy_fsk_list(a, s, 0)) 1007 return DNHT_SCAN_END; 1008 } 1009 if (a->flags & DN_C_FLOW) 1010 copy_si(a, s, 0); 1011 break; 1012 } 1013 } else if (a->type == DN_FS) { 1014 /* queue show, skip internal flowsets */ 1015 struct dn_fsk *fs = _o; 1016 1017 n = fs->fs.fs_nr; 1018 if (n >= DN_MAX_ID) 1019 return 0; 1020 /* see if the object is within one of our ranges */ 1021 for (;r < lim; r += 2) { 1022 if (n < r[0] || n > r[1]) 1023 continue; 1024 if (copy_flowset(a, fs, 0)) 1025 return DNHT_SCAN_END; 1026 copy_q(a, fs, 0); 1027 break; /* we are done */ 1028 } 1029 } 1030 return 0; 1031 } 1032 1033 static inline struct dn_schk * 1034 locate_scheduler(int i) 1035 { 1036 return dn_ht_find(dn_cfg.schedhash, i, 0, NULL); 1037 } 1038 1039 /* 1040 * red parameters are in fixed point arithmetic. 1041 */ 1042 static int 1043 config_red(struct dn_fsk *fs) 1044 { 1045 int64_t s, idle, weight, w0; 1046 int t, i; 1047 1048 fs->w_q = fs->fs.w_q; 1049 fs->max_p = fs->fs.max_p; 1050 ND("called"); 1051 /* Doing stuff that was in userland */ 1052 i = fs->sched->link.bandwidth; 1053 s = (i <= 0) ? 0 : 1054 hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; 1055 1056 idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */ 1057 fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth); 1058 /* fs->lookup_step not scaled, */ 1059 if (!fs->lookup_step) 1060 fs->lookup_step = 1; 1061 w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled 1062 1063 for (t = fs->lookup_step; t > 1; --t) 1064 weight = SCALE_MUL(weight, w0); 1065 fs->lookup_weight = (int)(weight); // scaled 1066 1067 /* Now doing stuff that was in kerneland */ 1068 fs->min_th = SCALE(fs->fs.min_th); 1069 fs->max_th = SCALE(fs->fs.max_th); 1070 1071 fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th); 1072 fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th)); 1073 1074 if (fs->fs.flags & DN_IS_GENTLE_RED) { 1075 fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th; 1076 fs->c_4 = SCALE(1) - 2 * fs->max_p; 1077 } 1078 1079 /* If the lookup table already exist, free and create it again. */ 1080 if (fs->w_q_lookup) { 1081 free(fs->w_q_lookup, M_DUMMYNET); 1082 fs->w_q_lookup = NULL; 1083 } 1084 if (dn_cfg.red_lookup_depth == 0) { 1085 printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" 1086 "must be > 0\n"); 1087 fs->fs.flags &= ~DN_IS_RED; 1088 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1089 return (EINVAL); 1090 } 1091 fs->lookup_depth = dn_cfg.red_lookup_depth; 1092 fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int), 1093 M_DUMMYNET, M_NOWAIT); 1094 if (fs->w_q_lookup == NULL) { 1095 printf("dummynet: sorry, cannot allocate red lookup table\n"); 1096 fs->fs.flags &= ~DN_IS_RED; 1097 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1098 return(ENOSPC); 1099 } 1100 1101 /* Fill the lookup table with (1 - w_q)^x */ 1102 fs->w_q_lookup[0] = SCALE(1) - fs->w_q; 1103 1104 for (i = 1; i < fs->lookup_depth; i++) 1105 fs->w_q_lookup[i] = 1106 SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight); 1107 1108 if (dn_cfg.red_avg_pkt_size < 1) 1109 dn_cfg.red_avg_pkt_size = 512; 1110 fs->avg_pkt_size = dn_cfg.red_avg_pkt_size; 1111 if (dn_cfg.red_max_pkt_size < 1) 1112 dn_cfg.red_max_pkt_size = 1500; 1113 fs->max_pkt_size = dn_cfg.red_max_pkt_size; 1114 ND("exit"); 1115 return 0; 1116 } 1117 1118 /* Scan all flowset attached to this scheduler and update red */ 1119 static void 1120 update_red(struct dn_schk *s) 1121 { 1122 struct dn_fsk *fs; 1123 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1124 if (fs && (fs->fs.flags & DN_IS_RED)) 1125 config_red(fs); 1126 } 1127 } 1128 1129 /* attach flowset to scheduler s, possibly requeue */ 1130 static void 1131 fsk_attach(struct dn_fsk *fs, struct dn_schk *s) 1132 { 1133 ND("remove fs %d from fsunlinked, link to sched %d", 1134 fs->fs.fs_nr, s->sch.sched_nr); 1135 SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain); 1136 fs->sched = s; 1137 SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); 1138 if (s->fp->new_fsk) 1139 s->fp->new_fsk(fs); 1140 /* XXX compute fsk_mask */ 1141 fs->fsk_mask = fs->fs.flow_mask; 1142 if (fs->sched->sch.flags & DN_HAVE_MASK) 1143 flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask); 1144 if (fs->qht) { 1145 /* 1146 * we must drain qht according to the old 1147 * type, and reinsert according to the new one. 1148 * The requeue is complex -- in general we need to 1149 * reclassify every single packet. 1150 * For the time being, let's hope qht is never set 1151 * when we reach this point. 1152 */ 1153 D("XXX TODO requeue from fs %d to sch %d", 1154 fs->fs.fs_nr, s->sch.sched_nr); 1155 fs->qht = NULL; 1156 } 1157 /* set the new type for qht */ 1158 if (nonzero_mask(&fs->fsk_mask)) 1159 fs->fs.flags |= DN_QHT_HASH; 1160 else 1161 fs->fs.flags &= ~DN_QHT_HASH; 1162 1163 /* XXX config_red() can fail... */ 1164 if (fs->fs.flags & DN_IS_RED) 1165 config_red(fs); 1166 } 1167 1168 /* update all flowsets which may refer to this scheduler */ 1169 static void 1170 update_fs(struct dn_schk *s) 1171 { 1172 struct dn_fsk *fs, *tmp; 1173 1174 SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) { 1175 if (s->sch.sched_nr != fs->fs.sched_nr) { 1176 D("fs %d for sch %d not %d still unlinked", 1177 fs->fs.fs_nr, fs->fs.sched_nr, 1178 s->sch.sched_nr); 1179 continue; 1180 } 1181 fsk_attach(fs, s); 1182 } 1183 } 1184 1185 /* 1186 * Configuration -- to preserve backward compatibility we use 1187 * the following scheme (N is 65536) 1188 * NUMBER SCHED LINK FLOWSET 1189 * 1 .. N-1 (1)WFQ (2)WFQ (3)queue 1190 * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1 1191 * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1 1192 * 1193 * "pipe i config" configures #1, #2 and #3 1194 * "sched i config" configures #1 and possibly #6 1195 * "queue i config" configures #3 1196 * #1 is configured with 'pipe i config' or 'sched i config' 1197 * #2 is configured with 'pipe i config', and created if not 1198 * existing with 'sched i config' 1199 * #3 is configured with 'queue i config' 1200 * #4 is automatically configured after #1, can only be FIFO 1201 * #5 is automatically configured after #2 1202 * #6 is automatically created when #1 is !MULTIQUEUE, 1203 * and can be updated. 1204 * #7 is automatically configured after #2 1205 */ 1206 1207 /* 1208 * configure a link (and its FIFO instance) 1209 */ 1210 static int 1211 config_link(struct dn_link *p, struct dn_id *arg) 1212 { 1213 int i; 1214 1215 if (p->oid.len != sizeof(*p)) { 1216 D("invalid pipe len %d", p->oid.len); 1217 return EINVAL; 1218 } 1219 i = p->link_nr; 1220 if (i <= 0 || i >= DN_MAX_ID) 1221 return EINVAL; 1222 /* 1223 * The config program passes parameters as follows: 1224 * bw = bits/second (0 means no limits), 1225 * delay = ms, must be translated into ticks. 1226 * qsize = slots/bytes 1227 * burst ??? 1228 */ 1229 p->delay = (p->delay * hz) / 1000; 1230 /* Scale burst size: bytes -> bits * hz */ 1231 p->burst *= 8 * hz; 1232 1233 DN_BH_WLOCK(); 1234 /* do it twice, base link and FIFO link */ 1235 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1236 struct dn_schk *s = locate_scheduler(i); 1237 if (s == NULL) { 1238 DN_BH_WUNLOCK(); 1239 D("sched %d not found", i); 1240 return EINVAL; 1241 } 1242 /* remove profile if exists */ 1243 if (s->profile) { 1244 free(s->profile, M_DUMMYNET); 1245 s->profile = NULL; 1246 } 1247 /* copy all parameters */ 1248 s->link.oid = p->oid; 1249 s->link.link_nr = i; 1250 s->link.delay = p->delay; 1251 if (s->link.bandwidth != p->bandwidth) { 1252 /* XXX bandwidth changes, need to update red params */ 1253 s->link.bandwidth = p->bandwidth; 1254 update_red(s); 1255 } 1256 s->link.burst = p->burst; 1257 schk_reset_credit(s); 1258 } 1259 dn_cfg.id++; 1260 DN_BH_WUNLOCK(); 1261 return 0; 1262 } 1263 1264 /* 1265 * configure a flowset. Can be called from inside with locked=1, 1266 */ 1267 static struct dn_fsk * 1268 config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) 1269 { 1270 int i; 1271 struct dn_fsk *fs; 1272 1273 if (nfs->oid.len != sizeof(*nfs)) { 1274 D("invalid flowset len %d", nfs->oid.len); 1275 return NULL; 1276 } 1277 i = nfs->fs_nr; 1278 if (i <= 0 || i >= 3*DN_MAX_ID) 1279 return NULL; 1280 ND("flowset %d", i); 1281 /* XXX other sanity checks */ 1282 if (nfs->flags & DN_QSIZE_BYTES) { 1283 ipdn_bound_var(&nfs->qsize, 16384, 1284 1500, dn_cfg.byte_limit, NULL); // "queue byte size"); 1285 } else { 1286 ipdn_bound_var(&nfs->qsize, 50, 1287 1, dn_cfg.slot_limit, NULL); // "queue slot size"); 1288 } 1289 if (nfs->flags & DN_HAVE_MASK) { 1290 /* make sure we have some buckets */ 1291 ipdn_bound_var((int *)&nfs->buckets, dn_cfg.hash_size, 1292 1, dn_cfg.max_hash_size, "flowset buckets"); 1293 } else { 1294 nfs->buckets = 1; /* we only need 1 */ 1295 } 1296 if (!locked) 1297 DN_BH_WLOCK(); 1298 do { /* exit with break when done */ 1299 struct dn_schk *s; 1300 int flags = nfs->sched_nr ? DNHT_INSERT : 0; 1301 int j; 1302 int oldc = dn_cfg.fsk_count; 1303 fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL); 1304 if (fs == NULL) { 1305 D("missing sched for flowset %d", i); 1306 break; 1307 } 1308 /* grab some defaults from the existing one */ 1309 if (nfs->sched_nr == 0) /* reuse */ 1310 nfs->sched_nr = fs->fs.sched_nr; 1311 for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) { 1312 if (nfs->par[j] == -1) /* reuse */ 1313 nfs->par[j] = fs->fs.par[j]; 1314 } 1315 if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) { 1316 ND("flowset %d unchanged", i); 1317 break; /* no change, nothing to do */ 1318 } 1319 if (oldc != dn_cfg.fsk_count) /* new item */ 1320 dn_cfg.id++; 1321 s = locate_scheduler(nfs->sched_nr); 1322 /* detach from old scheduler if needed, preserving 1323 * queues if we need to reattach. Then update the 1324 * configuration, and possibly attach to the new sched. 1325 */ 1326 DX(2, "fs %d changed sched %d@%p to %d@%p", 1327 fs->fs.fs_nr, 1328 fs->fs.sched_nr, fs->sched, nfs->sched_nr, s); 1329 if (fs->sched) { 1330 int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY); 1331 flags |= DN_DESTROY; /* XXX temporary */ 1332 fsk_detach(fs, flags); 1333 } 1334 fs->fs = *nfs; /* copy configuration */ 1335 if (s != NULL) 1336 fsk_attach(fs, s); 1337 } while (0); 1338 if (!locked) 1339 DN_BH_WUNLOCK(); 1340 return fs; 1341 } 1342 1343 /* 1344 * config/reconfig a scheduler and its FIFO variant. 1345 * For !MULTIQUEUE schedulers, also set up the flowset. 1346 * 1347 * On reconfigurations (detected because s->fp is set), 1348 * detach existing flowsets preserving traffic, preserve link, 1349 * and delete the old scheduler creating a new one. 1350 */ 1351 static int 1352 config_sched(struct dn_sch *_nsch, struct dn_id *arg) 1353 { 1354 struct dn_schk *s; 1355 struct schk_new_arg a; /* argument for schk_new */ 1356 int i; 1357 struct dn_link p; /* copy of oldlink */ 1358 struct dn_profile *pf = NULL; /* copy of old link profile */ 1359 /* Used to preserv mask parameter */ 1360 struct ipfw_flow_id new_mask; 1361 int new_buckets = 0; 1362 int new_flags = 0; 1363 int pipe_cmd; 1364 int err = ENOMEM; 1365 1366 a.sch = _nsch; 1367 if (a.sch->oid.len != sizeof(*a.sch)) { 1368 D("bad sched len %d", a.sch->oid.len); 1369 return EINVAL; 1370 } 1371 i = a.sch->sched_nr; 1372 if (i <= 0 || i >= DN_MAX_ID) 1373 return EINVAL; 1374 /* make sure we have some buckets */ 1375 if (a.sch->flags & DN_HAVE_MASK) 1376 ipdn_bound_var((int *)&a.sch->buckets, dn_cfg.hash_size, 1377 1, dn_cfg.max_hash_size, "sched buckets"); 1378 /* XXX other sanity checks */ 1379 bzero(&p, sizeof(p)); 1380 1381 pipe_cmd = a.sch->flags & DN_PIPE_CMD; 1382 a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set? 1383 if (pipe_cmd) { 1384 /* Copy mask parameter */ 1385 new_mask = a.sch->sched_mask; 1386 new_buckets = a.sch->buckets; 1387 new_flags = a.sch->flags; 1388 } 1389 DN_BH_WLOCK(); 1390 again: /* run twice, for wfq and fifo */ 1391 /* 1392 * lookup the type. If not supplied, use the previous one 1393 * or default to WF2Q+. Otherwise, return an error. 1394 */ 1395 dn_cfg.id++; 1396 a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name); 1397 if (a.fp != NULL) { 1398 /* found. Lookup or create entry */ 1399 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a); 1400 } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) { 1401 /* No type. search existing s* or retry with WF2Q+ */ 1402 s = dn_ht_find(dn_cfg.schedhash, i, 0, &a); 1403 if (s != NULL) { 1404 a.fp = s->fp; 1405 /* Scheduler exists, skip to FIFO scheduler 1406 * if command was pipe config... 1407 */ 1408 if (pipe_cmd) 1409 goto next; 1410 } else { 1411 /* New scheduler, create a wf2q+ with no mask 1412 * if command was pipe config... 1413 */ 1414 if (pipe_cmd) { 1415 /* clear mask parameter */ 1416 bzero(&a.sch->sched_mask, sizeof(new_mask)); 1417 a.sch->buckets = 0; 1418 a.sch->flags &= ~DN_HAVE_MASK; 1419 } 1420 a.sch->oid.subtype = DN_SCHED_WF2QP; 1421 goto again; 1422 } 1423 } else { 1424 D("invalid scheduler type %d %s", 1425 a.sch->oid.subtype, a.sch->name); 1426 err = EINVAL; 1427 goto error; 1428 } 1429 /* normalize name and subtype */ 1430 a.sch->oid.subtype = a.fp->type; 1431 bzero(a.sch->name, sizeof(a.sch->name)); 1432 strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name)); 1433 if (s == NULL) { 1434 D("cannot allocate scheduler %d", i); 1435 goto error; 1436 } 1437 /* restore existing link if any */ 1438 if (p.link_nr) { 1439 s->link = p; 1440 if (!pf || pf->link_nr != p.link_nr) { /* no saved value */ 1441 s->profile = NULL; /* XXX maybe not needed */ 1442 } else { 1443 s->profile = malloc(sizeof(struct dn_profile), 1444 M_DUMMYNET, M_NOWAIT | M_ZERO); 1445 if (s->profile == NULL) { 1446 D("cannot allocate profile"); 1447 goto error; //XXX 1448 } 1449 bcopy(pf, s->profile, sizeof(*pf)); 1450 } 1451 } 1452 p.link_nr = 0; 1453 if (s->fp == NULL) { 1454 DX(2, "sched %d new type %s", i, a.fp->name); 1455 } else if (s->fp != a.fp || 1456 bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) { 1457 /* already existing. */ 1458 DX(2, "sched %d type changed from %s to %s", 1459 i, s->fp->name, a.fp->name); 1460 DX(4, " type/sub %d/%d -> %d/%d", 1461 s->sch.oid.type, s->sch.oid.subtype, 1462 a.sch->oid.type, a.sch->oid.subtype); 1463 if (s->link.link_nr == 0) 1464 D("XXX WARNING link 0 for sched %d", i); 1465 p = s->link; /* preserve link */ 1466 if (s->profile) {/* preserve profile */ 1467 if (!pf) 1468 pf = malloc(sizeof(*pf), 1469 M_DUMMYNET, M_NOWAIT | M_ZERO); 1470 if (pf) /* XXX should issue a warning otherwise */ 1471 bcopy(s->profile, pf, sizeof(*pf)); 1472 } 1473 /* remove from the hash */ 1474 dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 1475 /* Detach flowsets, preserve queues. */ 1476 // schk_delete_cb(s, NULL); 1477 // XXX temporarily, kill queues 1478 schk_delete_cb(s, (void *)DN_DESTROY); 1479 goto again; 1480 } else { 1481 DX(4, "sched %d unchanged type %s", i, a.fp->name); 1482 } 1483 /* complete initialization */ 1484 s->sch = *a.sch; 1485 s->fp = a.fp; 1486 s->cfg = arg; 1487 // XXX schk_reset_credit(s); 1488 /* create the internal flowset if needed, 1489 * trying to reuse existing ones if available 1490 */ 1491 if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) { 1492 s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL); 1493 if (!s->fs) { 1494 struct dn_fs fs; 1495 bzero(&fs, sizeof(fs)); 1496 set_oid(&fs.oid, DN_FS, sizeof(fs)); 1497 fs.fs_nr = i + DN_MAX_ID; 1498 fs.sched_nr = i; 1499 s->fs = config_fs(&fs, NULL, 1 /* locked */); 1500 } 1501 if (!s->fs) { 1502 schk_delete_cb(s, (void *)DN_DESTROY); 1503 D("error creating internal fs for %d", i); 1504 goto error; 1505 } 1506 } 1507 /* call init function after the flowset is created */ 1508 if (s->fp->config) 1509 s->fp->config(s); 1510 update_fs(s); 1511 next: 1512 if (i < DN_MAX_ID) { /* now configure the FIFO instance */ 1513 i += DN_MAX_ID; 1514 if (pipe_cmd) { 1515 /* Restore mask parameter for FIFO */ 1516 a.sch->sched_mask = new_mask; 1517 a.sch->buckets = new_buckets; 1518 a.sch->flags = new_flags; 1519 } else { 1520 /* sched config shouldn't modify the FIFO scheduler */ 1521 if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) { 1522 /* FIFO already exist, don't touch it */ 1523 err = 0; /* and this is not an error */ 1524 goto error; 1525 } 1526 } 1527 a.sch->sched_nr = i; 1528 a.sch->oid.subtype = DN_SCHED_FIFO; 1529 bzero(a.sch->name, sizeof(a.sch->name)); 1530 goto again; 1531 } 1532 err = 0; 1533 error: 1534 DN_BH_WUNLOCK(); 1535 if (pf) 1536 free(pf, M_DUMMYNET); 1537 return err; 1538 } 1539 1540 /* 1541 * attach a profile to a link 1542 */ 1543 static int 1544 config_profile(struct dn_profile *pf, struct dn_id *arg) 1545 { 1546 struct dn_schk *s; 1547 int i, olen, err = 0; 1548 1549 if (pf->oid.len < sizeof(*pf)) { 1550 D("short profile len %d", pf->oid.len); 1551 return EINVAL; 1552 } 1553 i = pf->link_nr; 1554 if (i <= 0 || i >= DN_MAX_ID) 1555 return EINVAL; 1556 /* XXX other sanity checks */ 1557 DN_BH_WLOCK(); 1558 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1559 s = locate_scheduler(i); 1560 1561 if (s == NULL) { 1562 err = EINVAL; 1563 break; 1564 } 1565 dn_cfg.id++; 1566 /* 1567 * If we had a profile and the new one does not fit, 1568 * or it is deleted, then we need to free memory. 1569 */ 1570 if (s->profile && (pf->samples_no == 0 || 1571 s->profile->oid.len < pf->oid.len)) { 1572 free(s->profile, M_DUMMYNET); 1573 s->profile = NULL; 1574 } 1575 if (pf->samples_no == 0) 1576 continue; 1577 /* 1578 * new profile, possibly allocate memory 1579 * and copy data. 1580 */ 1581 if (s->profile == NULL) 1582 s->profile = malloc(pf->oid.len, 1583 M_DUMMYNET, M_NOWAIT | M_ZERO); 1584 if (s->profile == NULL) { 1585 D("no memory for profile %d", i); 1586 err = ENOMEM; 1587 break; 1588 } 1589 /* preserve larger length XXX double check */ 1590 olen = s->profile->oid.len; 1591 if (olen < pf->oid.len) 1592 olen = pf->oid.len; 1593 bcopy(pf, s->profile, pf->oid.len); 1594 s->profile->oid.len = olen; 1595 } 1596 DN_BH_WUNLOCK(); 1597 return err; 1598 } 1599 1600 /* 1601 * Delete all objects: 1602 */ 1603 static void 1604 dummynet_flush(void) 1605 { 1606 1607 /* delete all schedulers and related links/queues/flowsets */ 1608 dn_ht_scan(dn_cfg.schedhash, schk_delete_cb, 1609 (void *)(uintptr_t)DN_DELETE_FS); 1610 /* delete all remaining (unlinked) flowsets */ 1611 DX(4, "still %d unlinked fs", dn_cfg.fsk_count); 1612 dn_ht_free(dn_cfg.fshash, DNHT_REMOVE); 1613 fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS); 1614 /* Reinitialize system heap... */ 1615 heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 1616 } 1617 1618 /* 1619 * Main handler for configuration. We are guaranteed to be called 1620 * with an oid which is at least a dn_id. 1621 * - the first object is the command (config, delete, flush, ...) 1622 * - config_link must be issued after the corresponding config_sched 1623 * - parameters (DN_TXT) for an object must preceed the object 1624 * processed on a config_sched. 1625 */ 1626 int 1627 do_config(void *p, int l) 1628 { 1629 struct dn_id *next, *o; 1630 int err = 0, err2 = 0; 1631 struct dn_id *arg = NULL; 1632 uintptr_t *a; 1633 1634 o = p; 1635 if (o->id != DN_API_VERSION) { 1636 D("invalid api version got %d need %d", 1637 o->id, DN_API_VERSION); 1638 return EINVAL; 1639 } 1640 for (; l >= sizeof(*o); o = next) { 1641 struct dn_id *prev = arg; 1642 if (o->len < sizeof(*o) || l < o->len) { 1643 D("bad len o->len %d len %d", o->len, l); 1644 err = EINVAL; 1645 break; 1646 } 1647 l -= o->len; 1648 next = (struct dn_id *)((char *)o + o->len); 1649 err = 0; 1650 switch (o->type) { 1651 default: 1652 D("cmd %d not implemented", o->type); 1653 break; 1654 1655 #ifdef EMULATE_SYSCTL 1656 /* sysctl emulation. 1657 * if we recognize the command, jump to the correct 1658 * handler and return 1659 */ 1660 case DN_SYSCTL_SET: 1661 err = kesysctl_emu_set(p, l); 1662 return err; 1663 #endif 1664 1665 case DN_CMD_CONFIG: /* simply a header */ 1666 break; 1667 1668 case DN_CMD_DELETE: 1669 /* the argument is in the first uintptr_t after o */ 1670 a = (uintptr_t *)(o+1); 1671 if (o->len < sizeof(*o) + sizeof(*a)) { 1672 err = EINVAL; 1673 break; 1674 } 1675 switch (o->subtype) { 1676 case DN_LINK: 1677 /* delete base and derived schedulers */ 1678 DN_BH_WLOCK(); 1679 err = delete_schk(*a); 1680 err2 = delete_schk(*a + DN_MAX_ID); 1681 DN_BH_WUNLOCK(); 1682 if (!err) 1683 err = err2; 1684 break; 1685 1686 default: 1687 D("invalid delete type %d", 1688 o->subtype); 1689 err = EINVAL; 1690 break; 1691 1692 case DN_FS: 1693 err = (*a <1 || *a >= DN_MAX_ID) ? 1694 EINVAL : delete_fs(*a, 0) ; 1695 break; 1696 } 1697 break; 1698 1699 case DN_CMD_FLUSH: 1700 DN_BH_WLOCK(); 1701 dummynet_flush(); 1702 DN_BH_WUNLOCK(); 1703 break; 1704 case DN_TEXT: /* store argument the next block */ 1705 prev = NULL; 1706 arg = o; 1707 break; 1708 case DN_LINK: 1709 err = config_link((struct dn_link *)o, arg); 1710 break; 1711 case DN_PROFILE: 1712 err = config_profile((struct dn_profile *)o, arg); 1713 break; 1714 case DN_SCH: 1715 err = config_sched((struct dn_sch *)o, arg); 1716 break; 1717 case DN_FS: 1718 err = (NULL==config_fs((struct dn_fs *)o, arg, 0)); 1719 break; 1720 } 1721 if (prev) 1722 arg = NULL; 1723 if (err != 0) 1724 break; 1725 } 1726 return err; 1727 } 1728 1729 static int 1730 compute_space(struct dn_id *cmd, struct copy_args *a) 1731 { 1732 int x = 0, need = 0; 1733 int profile_size = sizeof(struct dn_profile) - 1734 ED_MAX_SAMPLES_NO*sizeof(int); 1735 1736 /* NOTE about compute space: 1737 * NP = dn_cfg.schk_count 1738 * NSI = dn_cfg.si_count 1739 * NF = dn_cfg.fsk_count 1740 * NQ = dn_cfg.queue_count 1741 * - ipfw pipe show 1742 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 1743 * link, scheduler template, flowset 1744 * integrated in scheduler and header 1745 * for flowset list 1746 * (NSI)*(dn_flow) all scheduler instance (includes 1747 * the queue instance) 1748 * - ipfw sched show 1749 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 1750 * link, scheduler template, flowset 1751 * integrated in scheduler and header 1752 * for flowset list 1753 * (NSI * dn_flow) all scheduler instances 1754 * (NF * sizeof(uint_32)) space for flowset list linked to scheduler 1755 * (NQ * dn_queue) all queue [XXXfor now not listed] 1756 * - ipfw queue show 1757 * (NF * dn_fs) all flowset 1758 * (NQ * dn_queue) all queues 1759 */ 1760 switch (cmd->subtype) { 1761 default: 1762 return -1; 1763 /* XXX where do LINK and SCH differ ? */ 1764 /* 'ipfw sched show' could list all queues associated to 1765 * a scheduler. This feature for now is disabled 1766 */ 1767 case DN_LINK: /* pipe show */ 1768 x = DN_C_LINK | DN_C_SCH | DN_C_FLOW; 1769 need += dn_cfg.schk_count * 1770 (sizeof(struct dn_fs) + profile_size) / 2; 1771 need += dn_cfg.fsk_count * sizeof(uint32_t); 1772 break; 1773 case DN_SCH: /* sched show */ 1774 need += dn_cfg.schk_count * 1775 (sizeof(struct dn_fs) + profile_size) / 2; 1776 need += dn_cfg.fsk_count * sizeof(uint32_t); 1777 x = DN_C_SCH | DN_C_LINK | DN_C_FLOW; 1778 break; 1779 case DN_FS: /* queue show */ 1780 x = DN_C_FS | DN_C_QUEUE; 1781 break; 1782 case DN_GET_COMPAT: /* compatibility mode */ 1783 need = dn_compat_calc_size(); 1784 break; 1785 } 1786 a->flags = x; 1787 if (x & DN_C_SCH) { 1788 need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2; 1789 /* NOT also, each fs might be attached to a sched */ 1790 need += dn_cfg.schk_count * sizeof(struct dn_id) / 2; 1791 } 1792 if (x & DN_C_FS) 1793 need += dn_cfg.fsk_count * sizeof(struct dn_fs); 1794 if (x & DN_C_LINK) { 1795 need += dn_cfg.schk_count * sizeof(struct dn_link) / 2; 1796 } 1797 /* 1798 * When exporting a queue to userland, only pass up the 1799 * struct dn_flow, which is the only visible part. 1800 */ 1801 1802 if (x & DN_C_QUEUE) 1803 need += dn_cfg.queue_count * sizeof(struct dn_flow); 1804 if (x & DN_C_FLOW) 1805 need += dn_cfg.si_count * (sizeof(struct dn_flow)); 1806 return need; 1807 } 1808 1809 /* 1810 * If compat != NULL dummynet_get is called in compatibility mode. 1811 * *compat will be the pointer to the buffer to pass to ipfw 1812 */ 1813 int 1814 dummynet_get(struct sockopt *sopt, void **compat) 1815 { 1816 int have, i, need, error; 1817 char *start = NULL, *buf; 1818 size_t sopt_valsize; 1819 struct dn_id *cmd; 1820 struct copy_args a; 1821 struct copy_range r; 1822 int l = sizeof(struct dn_id); 1823 1824 bzero(&a, sizeof(a)); 1825 bzero(&r, sizeof(r)); 1826 1827 /* save and restore original sopt_valsize around copyin */ 1828 sopt_valsize = sopt->sopt_valsize; 1829 1830 cmd = &r.o; 1831 1832 if (!compat) { 1833 /* copy at least an oid, and possibly a full object */ 1834 error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); 1835 sopt->sopt_valsize = sopt_valsize; 1836 if (error) 1837 goto done; 1838 l = cmd->len; 1839 #ifdef EMULATE_SYSCTL 1840 /* sysctl emulation. */ 1841 if (cmd->type == DN_SYSCTL_GET) 1842 return kesysctl_emu_get(sopt); 1843 #endif 1844 if (l > sizeof(r)) { 1845 /* request larger than default, allocate buffer */ 1846 cmd = malloc(l, M_DUMMYNET, M_WAITOK); 1847 error = sooptcopyin(sopt, cmd, l, l); 1848 sopt->sopt_valsize = sopt_valsize; 1849 if (error) 1850 goto done; 1851 } 1852 } else { /* compatibility */ 1853 error = 0; 1854 cmd->type = DN_CMD_GET; 1855 cmd->len = sizeof(struct dn_id); 1856 cmd->subtype = DN_GET_COMPAT; 1857 // cmd->id = sopt_valsize; 1858 D("compatibility mode"); 1859 } 1860 a.extra = (struct copy_range *)cmd; 1861 if (cmd->len == sizeof(*cmd)) { /* no range, create a default */ 1862 uint32_t *rp = (uint32_t *)(cmd + 1); 1863 cmd->len += 2* sizeof(uint32_t); 1864 rp[0] = 1; 1865 rp[1] = DN_MAX_ID - 1; 1866 if (cmd->subtype == DN_LINK) { 1867 rp[0] += DN_MAX_ID; 1868 rp[1] += DN_MAX_ID; 1869 } 1870 } 1871 /* Count space (under lock) and allocate (outside lock). 1872 * Exit with lock held if we manage to get enough buffer. 1873 * Try a few times then give up. 1874 */ 1875 for (have = 0, i = 0; i < 10; i++) { 1876 DN_BH_WLOCK(); 1877 need = compute_space(cmd, &a); 1878 1879 /* if there is a range, ignore value from compute_space() */ 1880 if (l > sizeof(*cmd)) 1881 need = sopt_valsize - sizeof(*cmd); 1882 1883 if (need < 0) { 1884 DN_BH_WUNLOCK(); 1885 error = EINVAL; 1886 goto done; 1887 } 1888 need += sizeof(*cmd); 1889 cmd->id = need; 1890 if (have >= need) 1891 break; 1892 1893 DN_BH_WUNLOCK(); 1894 if (start) 1895 free(start, M_DUMMYNET); 1896 start = NULL; 1897 if (need > sopt_valsize) 1898 break; 1899 1900 have = need; 1901 start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO); 1902 } 1903 1904 if (start == NULL) { 1905 if (compat) { 1906 *compat = NULL; 1907 error = 1; // XXX 1908 } else { 1909 error = sooptcopyout(sopt, cmd, sizeof(*cmd)); 1910 } 1911 goto done; 1912 } 1913 ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " 1914 "%d:%d si %d, %d:%d queues %d", 1915 dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH, 1916 dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK, 1917 dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS, 1918 dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I, 1919 dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE); 1920 sopt->sopt_valsize = sopt_valsize; 1921 a.type = cmd->subtype; 1922 1923 if (compat == NULL) { 1924 bcopy(cmd, start, sizeof(*cmd)); 1925 ((struct dn_id*)(start))->len = sizeof(struct dn_id); 1926 buf = start + sizeof(*cmd); 1927 } else 1928 buf = start; 1929 a.start = &buf; 1930 a.end = start + have; 1931 /* start copying other objects */ 1932 if (compat) { 1933 a.type = DN_COMPAT_PIPE; 1934 dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a); 1935 a.type = DN_COMPAT_QUEUE; 1936 dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a); 1937 } else if (a.type == DN_FS) { 1938 dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a); 1939 } else { 1940 dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a); 1941 } 1942 DN_BH_WUNLOCK(); 1943 1944 if (compat) { 1945 *compat = start; 1946 sopt->sopt_valsize = buf - start; 1947 /* free() is done by ip_dummynet_compat() */ 1948 start = NULL; //XXX hack 1949 } else { 1950 error = sooptcopyout(sopt, start, buf - start); 1951 } 1952 done: 1953 if (cmd && cmd != &r.o) 1954 free(cmd, M_DUMMYNET); 1955 if (start) 1956 free(start, M_DUMMYNET); 1957 return error; 1958 } 1959 1960 /* Callback called on scheduler instance to delete it if idle */ 1961 static int 1962 drain_scheduler_cb(void *_si, void *arg) 1963 { 1964 struct dn_sch_inst *si = _si; 1965 1966 if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL) 1967 return 0; 1968 1969 if (si->sched->fp->flags & DN_MULTIQUEUE) { 1970 if (si->q_count == 0) 1971 return si_destroy(si, NULL); 1972 else 1973 return 0; 1974 } else { /* !DN_MULTIQUEUE */ 1975 if ((si+1)->ni.length == 0) 1976 return si_destroy(si, NULL); 1977 else 1978 return 0; 1979 } 1980 return 0; /* unreachable */ 1981 } 1982 1983 /* Callback called on scheduler to check if it has instances */ 1984 static int 1985 drain_scheduler_sch_cb(void *_s, void *arg) 1986 { 1987 struct dn_schk *s = _s; 1988 1989 if (s->sch.flags & DN_HAVE_MASK) { 1990 dn_ht_scan_bucket(s->siht, &s->drain_bucket, 1991 drain_scheduler_cb, NULL); 1992 s->drain_bucket++; 1993 } else { 1994 if (s->siht) { 1995 if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL) 1996 s->siht = NULL; 1997 } 1998 } 1999 return 0; 2000 } 2001 2002 /* Called every tick, try to delete a 'bucket' of scheduler */ 2003 void 2004 dn_drain_scheduler(void) 2005 { 2006 dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch, 2007 drain_scheduler_sch_cb, NULL); 2008 dn_cfg.drain_sch++; 2009 } 2010 2011 /* Callback called on queue to delete if it is idle */ 2012 static int 2013 drain_queue_cb(void *_q, void *arg) 2014 { 2015 struct dn_queue *q = _q; 2016 2017 if (q->ni.length == 0) { 2018 dn_delete_queue(q, DN_DESTROY); 2019 return DNHT_SCAN_DEL; /* queue is deleted */ 2020 } 2021 2022 return 0; /* queue isn't deleted */ 2023 } 2024 2025 /* Callback called on flowset used to check if it has queues */ 2026 static int 2027 drain_queue_fs_cb(void *_fs, void *arg) 2028 { 2029 struct dn_fsk *fs = _fs; 2030 2031 if (fs->fs.flags & DN_QHT_HASH) { 2032 /* Flowset has a hash table for queues */ 2033 dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, 2034 drain_queue_cb, NULL); 2035 fs->drain_bucket++; 2036 } else { 2037 /* No hash table for this flowset, null the pointer 2038 * if the queue is deleted 2039 */ 2040 if (fs->qht) { 2041 if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL) 2042 fs->qht = NULL; 2043 } 2044 } 2045 return 0; 2046 } 2047 2048 /* Called every tick, try to delete a 'bucket' of queue */ 2049 void 2050 dn_drain_queue(void) 2051 { 2052 /* scan a bucket of flowset */ 2053 dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs, 2054 drain_queue_fs_cb, NULL); 2055 dn_cfg.drain_fs++; 2056 } 2057 2058 /* 2059 * Handler for the various dummynet socket options 2060 */ 2061 static int 2062 ip_dn_ctl(struct sockopt *sopt) 2063 { 2064 void *p = NULL; 2065 int error, l; 2066 2067 error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); 2068 if (error) 2069 return (error); 2070 2071 /* Disallow sets in really-really secure mode. */ 2072 if (sopt->sopt_dir == SOPT_SET) { 2073 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 2074 if (error) 2075 return (error); 2076 } 2077 2078 switch (sopt->sopt_name) { 2079 default : 2080 D("dummynet: unknown option %d", sopt->sopt_name); 2081 error = EINVAL; 2082 break; 2083 2084 case IP_DUMMYNET_FLUSH: 2085 case IP_DUMMYNET_CONFIGURE: 2086 case IP_DUMMYNET_DEL: /* remove a pipe or queue */ 2087 case IP_DUMMYNET_GET: 2088 D("dummynet: compat option %d", sopt->sopt_name); 2089 error = ip_dummynet_compat(sopt); 2090 break; 2091 2092 case IP_DUMMYNET3 : 2093 if (sopt->sopt_dir == SOPT_GET) { 2094 error = dummynet_get(sopt, NULL); 2095 break; 2096 } 2097 l = sopt->sopt_valsize; 2098 if (l < sizeof(struct dn_id) || l > 12000) { 2099 D("argument len %d invalid", l); 2100 break; 2101 } 2102 p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ? 2103 error = sooptcopyin(sopt, p, l, l); 2104 if (error) 2105 break ; 2106 error = do_config(p, l); 2107 break; 2108 } 2109 2110 if (p != NULL) 2111 free(p, M_TEMP); 2112 2113 return error ; 2114 } 2115 2116 2117 static void 2118 ip_dn_init(void) 2119 { 2120 if (dn_cfg.init_done) 2121 return; 2122 printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet); 2123 dn_cfg.init_done = 1; 2124 /* Set defaults here. MSVC does not accept initializers, 2125 * and this is also useful for vimages 2126 */ 2127 /* queue limits */ 2128 dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */ 2129 dn_cfg.byte_limit = 1024 * 1024; 2130 dn_cfg.expire = 1; 2131 2132 /* RED parameters */ 2133 dn_cfg.red_lookup_depth = 256; /* default lookup table depth */ 2134 dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */ 2135 dn_cfg.red_max_pkt_size = 1500; /* default max packet size */ 2136 2137 /* hash tables */ 2138 dn_cfg.max_hash_size = 65536; /* max in the hash tables */ 2139 dn_cfg.hash_size = 64; /* default hash size */ 2140 2141 /* create hash tables for schedulers and flowsets. 2142 * In both we search by key and by pointer. 2143 */ 2144 dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size, 2145 offsetof(struct dn_schk, schk_next), 2146 schk_hash, schk_match, schk_new); 2147 dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size, 2148 offsetof(struct dn_fsk, fsk_next), 2149 fsk_hash, fsk_match, fsk_new); 2150 2151 /* bucket index to drain object */ 2152 dn_cfg.drain_fs = 0; 2153 dn_cfg.drain_sch = 0; 2154 2155 heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 2156 SLIST_INIT(&dn_cfg.fsu); 2157 SLIST_INIT(&dn_cfg.schedlist); 2158 2159 DN_LOCK_INIT(); 2160 2161 TASK_INIT(&dn_task, 0, dummynet_task, curvnet); 2162 dn_tq = taskqueue_create("dummynet", M_WAITOK, 2163 taskqueue_thread_enqueue, &dn_tq); 2164 taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); 2165 2166 callout_init(&dn_timeout, CALLOUT_MPSAFE); 2167 callout_reset(&dn_timeout, 1, dummynet, NULL); 2168 2169 /* Initialize curr_time adjustment mechanics. */ 2170 getmicrouptime(&dn_cfg.prev_t); 2171 } 2172 2173 #ifdef KLD_MODULE 2174 static void 2175 ip_dn_destroy(int last) 2176 { 2177 callout_drain(&dn_timeout); 2178 2179 DN_BH_WLOCK(); 2180 if (last) { 2181 ND("removing last instance\n"); 2182 ip_dn_ctl_ptr = NULL; 2183 ip_dn_io_ptr = NULL; 2184 } 2185 2186 dummynet_flush(); 2187 DN_BH_WUNLOCK(); 2188 taskqueue_drain(dn_tq, &dn_task); 2189 taskqueue_free(dn_tq); 2190 2191 dn_ht_free(dn_cfg.schedhash, 0); 2192 dn_ht_free(dn_cfg.fshash, 0); 2193 heap_free(&dn_cfg.evheap); 2194 2195 DN_LOCK_DESTROY(); 2196 } 2197 #endif /* KLD_MODULE */ 2198 2199 static int 2200 dummynet_modevent(module_t mod, int type, void *data) 2201 { 2202 2203 if (type == MOD_LOAD) { 2204 if (ip_dn_io_ptr) { 2205 printf("DUMMYNET already loaded\n"); 2206 return EEXIST ; 2207 } 2208 ip_dn_init(); 2209 ip_dn_ctl_ptr = ip_dn_ctl; 2210 ip_dn_io_ptr = dummynet_io; 2211 return 0; 2212 } else if (type == MOD_UNLOAD) { 2213 #if !defined(KLD_MODULE) 2214 printf("dummynet statically compiled, cannot unload\n"); 2215 return EINVAL ; 2216 #else 2217 ip_dn_destroy(1 /* last */); 2218 return 0; 2219 #endif 2220 } else 2221 return EOPNOTSUPP; 2222 } 2223 2224 /* modevent helpers for the modules */ 2225 static int 2226 load_dn_sched(struct dn_alg *d) 2227 { 2228 struct dn_alg *s; 2229 2230 if (d == NULL) 2231 return 1; /* error */ 2232 ip_dn_init(); /* just in case, we need the lock */ 2233 2234 /* Check that mandatory funcs exists */ 2235 if (d->enqueue == NULL || d->dequeue == NULL) { 2236 D("missing enqueue or dequeue for %s", d->name); 2237 return 1; 2238 } 2239 2240 /* Search if scheduler already exists */ 2241 DN_BH_WLOCK(); 2242 SLIST_FOREACH(s, &dn_cfg.schedlist, next) { 2243 if (strcmp(s->name, d->name) == 0) { 2244 D("%s already loaded", d->name); 2245 break; /* scheduler already exists */ 2246 } 2247 } 2248 if (s == NULL) 2249 SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next); 2250 DN_BH_WUNLOCK(); 2251 D("dn_sched %s %sloaded", d->name, s ? "not ":""); 2252 return s ? 1 : 0; 2253 } 2254 2255 static int 2256 unload_dn_sched(struct dn_alg *s) 2257 { 2258 struct dn_alg *tmp, *r; 2259 int err = EINVAL; 2260 2261 ND("called for %s", s->name); 2262 2263 DN_BH_WLOCK(); 2264 SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) { 2265 if (strcmp(s->name, r->name) != 0) 2266 continue; 2267 ND("ref_count = %d", r->ref_count); 2268 err = (r->ref_count != 0) ? EBUSY : 0; 2269 if (err == 0) 2270 SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next); 2271 break; 2272 } 2273 DN_BH_WUNLOCK(); 2274 D("dn_sched %s %sunloaded", s->name, err ? "not ":""); 2275 return err; 2276 } 2277 2278 int 2279 dn_sched_modevent(module_t mod, int cmd, void *arg) 2280 { 2281 struct dn_alg *sch = arg; 2282 2283 if (cmd == MOD_LOAD) 2284 return load_dn_sched(sch); 2285 else if (cmd == MOD_UNLOAD) 2286 return unload_dn_sched(sch); 2287 else 2288 return EINVAL; 2289 } 2290 2291 static moduledata_t dummynet_mod = { 2292 "dummynet", dummynet_modevent, NULL 2293 }; 2294 2295 #define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN 2296 #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ 2297 DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); 2298 MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); 2299 MODULE_VERSION(dummynet, 3); 2300 2301 /* 2302 * Starting up. Done in order after dummynet_modevent() has been called. 2303 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2304 */ 2305 //VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL); 2306 2307 /* 2308 * Shutdown handlers up shop. These are done in REVERSE ORDER, but still 2309 * after dummynet_modevent() has been called. Not called on reboot. 2310 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2311 * or when the module is unloaded. 2312 */ 2313 //VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL); 2314 2315 /* end of file */ 2316