1 /*- 2 * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa 3 * Portions Copyright (c) 2000 Akamba Corp. 4 * All rights reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 /* 32 * Configuration and internal object management for dummynet. 33 */ 34 35 #include "opt_inet6.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/module.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/rwlock.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/time.h> 50 #include <sys/taskqueue.h> 51 #include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ 52 #include <netinet/in.h> 53 #include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ 54 #include <netinet/ip_fw.h> 55 #include <netinet/ip_dummynet.h> 56 57 #include <netpfil/ipfw/ip_fw_private.h> 58 #include <netpfil/ipfw/dn_heap.h> 59 #include <netpfil/ipfw/ip_dn_private.h> 60 #include <netpfil/ipfw/dn_sched.h> 61 62 /* which objects to copy */ 63 #define DN_C_LINK 0x01 64 #define DN_C_SCH 0x02 65 #define DN_C_FLOW 0x04 66 #define DN_C_FS 0x08 67 #define DN_C_QUEUE 0x10 68 69 /* we use this argument in case of a schk_new */ 70 struct schk_new_arg { 71 struct dn_alg *fp; 72 struct dn_sch *sch; 73 }; 74 75 /*---- callout hooks. ----*/ 76 static struct callout dn_timeout; 77 static struct task dn_task; 78 static struct taskqueue *dn_tq = NULL; 79 80 static void 81 dummynet(void *arg) 82 { 83 84 (void)arg; /* UNUSED */ 85 taskqueue_enqueue_fast(dn_tq, &dn_task); 86 } 87 88 void 89 dn_reschedule(void) 90 { 91 92 callout_reset_sbt(&dn_timeout, tick_sbt, 0, dummynet, NULL, 93 C_HARDCLOCK | C_DIRECT_EXEC); 94 } 95 /*----- end of callout hooks -----*/ 96 97 /* Return a scheduler descriptor given the type or name. */ 98 static struct dn_alg * 99 find_sched_type(int type, char *name) 100 { 101 struct dn_alg *d; 102 103 SLIST_FOREACH(d, &dn_cfg.schedlist, next) { 104 if (d->type == type || (name && !strcasecmp(d->name, name))) 105 return d; 106 } 107 return NULL; /* not found */ 108 } 109 110 int 111 ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) 112 { 113 int oldv = *v; 114 const char *op = NULL; 115 if (dflt < lo) 116 dflt = lo; 117 if (dflt > hi) 118 dflt = hi; 119 if (oldv < lo) { 120 *v = dflt; 121 op = "Bump"; 122 } else if (oldv > hi) { 123 *v = hi; 124 op = "Clamp"; 125 } else 126 return *v; 127 if (op && msg) 128 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 129 return *v; 130 } 131 132 /*---- flow_id mask, hash and compare functions ---*/ 133 /* 134 * The flow_id includes the 5-tuple, the queue/pipe number 135 * which we store in the extra area in host order, 136 * and for ipv6 also the flow_id6. 137 * XXX see if we want the tos byte (can store in 'flags') 138 */ 139 static struct ipfw_flow_id * 140 flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) 141 { 142 int is_v6 = IS_IP6_FLOW_ID(id); 143 144 id->dst_port &= mask->dst_port; 145 id->src_port &= mask->src_port; 146 id->proto &= mask->proto; 147 id->extra &= mask->extra; 148 if (is_v6) { 149 APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); 150 APPLY_MASK(&id->src_ip6, &mask->src_ip6); 151 id->flow_id6 &= mask->flow_id6; 152 } else { 153 id->dst_ip &= mask->dst_ip; 154 id->src_ip &= mask->src_ip; 155 } 156 return id; 157 } 158 159 /* computes an OR of two masks, result in dst and also returned */ 160 static struct ipfw_flow_id * 161 flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) 162 { 163 int is_v6 = IS_IP6_FLOW_ID(dst); 164 165 dst->dst_port |= src->dst_port; 166 dst->src_port |= src->src_port; 167 dst->proto |= src->proto; 168 dst->extra |= src->extra; 169 if (is_v6) { 170 #define OR_MASK(_d, _s) \ 171 (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ 172 (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ 173 (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ 174 (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; 175 OR_MASK(&dst->dst_ip6, &src->dst_ip6); 176 OR_MASK(&dst->src_ip6, &src->src_ip6); 177 #undef OR_MASK 178 dst->flow_id6 |= src->flow_id6; 179 } else { 180 dst->dst_ip |= src->dst_ip; 181 dst->src_ip |= src->src_ip; 182 } 183 return dst; 184 } 185 186 static int 187 nonzero_mask(struct ipfw_flow_id *m) 188 { 189 if (m->dst_port || m->src_port || m->proto || m->extra) 190 return 1; 191 if (IS_IP6_FLOW_ID(m)) { 192 return 193 m->dst_ip6.__u6_addr.__u6_addr32[0] || 194 m->dst_ip6.__u6_addr.__u6_addr32[1] || 195 m->dst_ip6.__u6_addr.__u6_addr32[2] || 196 m->dst_ip6.__u6_addr.__u6_addr32[3] || 197 m->src_ip6.__u6_addr.__u6_addr32[0] || 198 m->src_ip6.__u6_addr.__u6_addr32[1] || 199 m->src_ip6.__u6_addr.__u6_addr32[2] || 200 m->src_ip6.__u6_addr.__u6_addr32[3] || 201 m->flow_id6; 202 } else { 203 return m->dst_ip || m->src_ip; 204 } 205 } 206 207 /* XXX we may want a better hash function */ 208 static uint32_t 209 flow_id_hash(struct ipfw_flow_id *id) 210 { 211 uint32_t i; 212 213 if (IS_IP6_FLOW_ID(id)) { 214 uint32_t *d = (uint32_t *)&id->dst_ip6; 215 uint32_t *s = (uint32_t *)&id->src_ip6; 216 i = (d[0] ) ^ (d[1]) ^ 217 (d[2] ) ^ (d[3]) ^ 218 (d[0] >> 15) ^ (d[1] >> 15) ^ 219 (d[2] >> 15) ^ (d[3] >> 15) ^ 220 (s[0] << 1) ^ (s[1] << 1) ^ 221 (s[2] << 1) ^ (s[3] << 1) ^ 222 (s[0] << 16) ^ (s[1] << 16) ^ 223 (s[2] << 16) ^ (s[3] << 16) ^ 224 (id->dst_port << 1) ^ (id->src_port) ^ 225 (id->extra) ^ 226 (id->proto ) ^ (id->flow_id6); 227 } else { 228 i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ 229 (id->src_ip << 1) ^ (id->src_ip >> 16) ^ 230 (id->extra) ^ 231 (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); 232 } 233 return i; 234 } 235 236 /* Like bcmp, returns 0 if ids match, 1 otherwise. */ 237 static int 238 flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) 239 { 240 int is_v6 = IS_IP6_FLOW_ID(id1); 241 242 if (!is_v6) { 243 if (IS_IP6_FLOW_ID(id2)) 244 return 1; /* different address families */ 245 246 return (id1->dst_ip == id2->dst_ip && 247 id1->src_ip == id2->src_ip && 248 id1->dst_port == id2->dst_port && 249 id1->src_port == id2->src_port && 250 id1->proto == id2->proto && 251 id1->extra == id2->extra) ? 0 : 1; 252 } 253 /* the ipv6 case */ 254 return ( 255 !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && 256 !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && 257 id1->dst_port == id2->dst_port && 258 id1->src_port == id2->src_port && 259 id1->proto == id2->proto && 260 id1->extra == id2->extra && 261 id1->flow_id6 == id2->flow_id6) ? 0 : 1; 262 } 263 /*--------- end of flow-id mask, hash and compare ---------*/ 264 265 /*--- support functions for the qht hashtable ---- 266 * Entries are hashed by flow-id 267 */ 268 static uint32_t 269 q_hash(uintptr_t key, int flags, void *arg) 270 { 271 /* compute the hash slot from the flow id */ 272 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 273 &((struct dn_queue *)key)->ni.fid : 274 (struct ipfw_flow_id *)key; 275 276 return flow_id_hash(id); 277 } 278 279 static int 280 q_match(void *obj, uintptr_t key, int flags, void *arg) 281 { 282 struct dn_queue *o = (struct dn_queue *)obj; 283 struct ipfw_flow_id *id2; 284 285 if (flags & DNHT_KEY_IS_OBJ) { 286 /* compare pointers */ 287 id2 = &((struct dn_queue *)key)->ni.fid; 288 } else { 289 id2 = (struct ipfw_flow_id *)key; 290 } 291 return (0 == flow_id_cmp(&o->ni.fid, id2)); 292 } 293 294 /* 295 * create a new queue instance for the given 'key'. 296 */ 297 static void * 298 q_new(uintptr_t key, int flags, void *arg) 299 { 300 struct dn_queue *q, *template = arg; 301 struct dn_fsk *fs = template->fs; 302 int size = sizeof(*q) + fs->sched->fp->q_datalen; 303 304 q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO); 305 if (q == NULL) { 306 D("no memory for new queue"); 307 return NULL; 308 } 309 310 set_oid(&q->ni.oid, DN_QUEUE, size); 311 if (fs->fs.flags & DN_QHT_HASH) 312 q->ni.fid = *(struct ipfw_flow_id *)key; 313 q->fs = fs; 314 q->_si = template->_si; 315 q->_si->q_count++; 316 317 if (fs->sched->fp->new_queue) 318 fs->sched->fp->new_queue(q); 319 dn_cfg.queue_count++; 320 return q; 321 } 322 323 /* 324 * Notify schedulers that a queue is going away. 325 * If (flags & DN_DESTROY), also free the packets. 326 * The version for callbacks is called q_delete_cb(). 327 */ 328 static void 329 dn_delete_queue(struct dn_queue *q, int flags) 330 { 331 struct dn_fsk *fs = q->fs; 332 333 // D("fs %p si %p\n", fs, q->_si); 334 /* notify the parent scheduler that the queue is going away */ 335 if (fs && fs->sched->fp->free_queue) 336 fs->sched->fp->free_queue(q); 337 q->_si->q_count--; 338 q->_si = NULL; 339 if (flags & DN_DESTROY) { 340 if (q->mq.head) 341 dn_free_pkts(q->mq.head); 342 bzero(q, sizeof(*q)); // safety 343 free(q, M_DUMMYNET); 344 dn_cfg.queue_count--; 345 } 346 } 347 348 static int 349 q_delete_cb(void *q, void *arg) 350 { 351 int flags = (int)(uintptr_t)arg; 352 dn_delete_queue(q, flags); 353 return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0; 354 } 355 356 /* 357 * calls dn_delete_queue/q_delete_cb on all queues, 358 * which notifies the parent scheduler and possibly drains packets. 359 * flags & DN_DESTROY: drains queues and destroy qht; 360 */ 361 static void 362 qht_delete(struct dn_fsk *fs, int flags) 363 { 364 ND("fs %d start flags %d qht %p", 365 fs->fs.fs_nr, flags, fs->qht); 366 if (!fs->qht) 367 return; 368 if (fs->fs.flags & DN_QHT_HASH) { 369 dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags); 370 if (flags & DN_DESTROY) { 371 dn_ht_free(fs->qht, 0); 372 fs->qht = NULL; 373 } 374 } else { 375 dn_delete_queue((struct dn_queue *)(fs->qht), flags); 376 if (flags & DN_DESTROY) 377 fs->qht = NULL; 378 } 379 } 380 381 /* 382 * Find and possibly create the queue for a MULTIQUEUE scheduler. 383 * We never call it for !MULTIQUEUE (the queue is in the sch_inst). 384 */ 385 struct dn_queue * 386 ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, 387 struct ipfw_flow_id *id) 388 { 389 struct dn_queue template; 390 391 template._si = si; 392 template.fs = fs; 393 394 if (fs->fs.flags & DN_QHT_HASH) { 395 struct ipfw_flow_id masked_id; 396 if (fs->qht == NULL) { 397 fs->qht = dn_ht_init(NULL, fs->fs.buckets, 398 offsetof(struct dn_queue, q_next), 399 q_hash, q_match, q_new); 400 if (fs->qht == NULL) 401 return NULL; 402 } 403 masked_id = *id; 404 flow_id_mask(&fs->fsk_mask, &masked_id); 405 return dn_ht_find(fs->qht, (uintptr_t)&masked_id, 406 DNHT_INSERT, &template); 407 } else { 408 if (fs->qht == NULL) 409 fs->qht = q_new(0, 0, &template); 410 return (struct dn_queue *)fs->qht; 411 } 412 } 413 /*--- end of queue hash table ---*/ 414 415 /*--- support functions for the sch_inst hashtable ---- 416 * 417 * These are hashed by flow-id 418 */ 419 static uint32_t 420 si_hash(uintptr_t key, int flags, void *arg) 421 { 422 /* compute the hash slot from the flow id */ 423 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 424 &((struct dn_sch_inst *)key)->ni.fid : 425 (struct ipfw_flow_id *)key; 426 427 return flow_id_hash(id); 428 } 429 430 static int 431 si_match(void *obj, uintptr_t key, int flags, void *arg) 432 { 433 struct dn_sch_inst *o = obj; 434 struct ipfw_flow_id *id2; 435 436 id2 = (flags & DNHT_KEY_IS_OBJ) ? 437 &((struct dn_sch_inst *)key)->ni.fid : 438 (struct ipfw_flow_id *)key; 439 return flow_id_cmp(&o->ni.fid, id2) == 0; 440 } 441 442 /* 443 * create a new instance for the given 'key' 444 * Allocate memory for instance, delay line and scheduler private data. 445 */ 446 static void * 447 si_new(uintptr_t key, int flags, void *arg) 448 { 449 struct dn_schk *s = arg; 450 struct dn_sch_inst *si; 451 int l = sizeof(*si) + s->fp->si_datalen; 452 453 si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 454 if (si == NULL) 455 goto error; 456 457 /* Set length only for the part passed up to userland. */ 458 set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); 459 set_oid(&(si->dline.oid), DN_DELAY_LINE, 460 sizeof(struct delay_line)); 461 /* mark si and dline as outside the event queue */ 462 si->ni.oid.id = si->dline.oid.id = -1; 463 464 si->sched = s; 465 si->dline.si = si; 466 467 if (s->fp->new_sched && s->fp->new_sched(si)) { 468 D("new_sched error"); 469 goto error; 470 } 471 if (s->sch.flags & DN_HAVE_MASK) 472 si->ni.fid = *(struct ipfw_flow_id *)key; 473 474 dn_cfg.si_count++; 475 return si; 476 477 error: 478 if (si) { 479 bzero(si, sizeof(*si)); // safety 480 free(si, M_DUMMYNET); 481 } 482 return NULL; 483 } 484 485 /* 486 * Callback from siht to delete all scheduler instances. Remove 487 * si and delay line from the system heap, destroy all queues. 488 * We assume that all flowset have been notified and do not 489 * point to us anymore. 490 */ 491 static int 492 si_destroy(void *_si, void *arg) 493 { 494 struct dn_sch_inst *si = _si; 495 struct dn_schk *s = si->sched; 496 struct delay_line *dl = &si->dline; 497 498 if (dl->oid.subtype) /* remove delay line from event heap */ 499 heap_extract(&dn_cfg.evheap, dl); 500 dn_free_pkts(dl->mq.head); /* drain delay line */ 501 if (si->kflags & DN_ACTIVE) /* remove si from event heap */ 502 heap_extract(&dn_cfg.evheap, si); 503 if (s->fp->free_sched) 504 s->fp->free_sched(si); 505 bzero(si, sizeof(*si)); /* safety */ 506 free(si, M_DUMMYNET); 507 dn_cfg.si_count--; 508 return DNHT_SCAN_DEL; 509 } 510 511 /* 512 * Find the scheduler instance for this packet. If we need to apply 513 * a mask, do on a local copy of the flow_id to preserve the original. 514 * Assume siht is always initialized if we have a mask. 515 */ 516 struct dn_sch_inst * 517 ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id) 518 { 519 520 if (s->sch.flags & DN_HAVE_MASK) { 521 struct ipfw_flow_id id_t = *id; 522 flow_id_mask(&s->sch.sched_mask, &id_t); 523 return dn_ht_find(s->siht, (uintptr_t)&id_t, 524 DNHT_INSERT, s); 525 } 526 if (!s->siht) 527 s->siht = si_new(0, 0, s); 528 return (struct dn_sch_inst *)s->siht; 529 } 530 531 /* callback to flush credit for the scheduler instance */ 532 static int 533 si_reset_credit(void *_si, void *arg) 534 { 535 struct dn_sch_inst *si = _si; 536 struct dn_link *p = &si->sched->link; 537 538 si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0); 539 return 0; 540 } 541 542 static void 543 schk_reset_credit(struct dn_schk *s) 544 { 545 if (s->sch.flags & DN_HAVE_MASK) 546 dn_ht_scan(s->siht, si_reset_credit, NULL); 547 else if (s->siht) 548 si_reset_credit(s->siht, NULL); 549 } 550 /*---- end of sch_inst hashtable ---------------------*/ 551 552 /*------------------------------------------------------- 553 * flowset hash (fshash) support. Entries are hashed by fs_nr. 554 * New allocations are put in the fsunlinked list, from which 555 * they are removed when they point to a specific scheduler. 556 */ 557 static uint32_t 558 fsk_hash(uintptr_t key, int flags, void *arg) 559 { 560 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 561 ((struct dn_fsk *)key)->fs.fs_nr; 562 563 return ( (i>>8)^(i>>4)^i ); 564 } 565 566 static int 567 fsk_match(void *obj, uintptr_t key, int flags, void *arg) 568 { 569 struct dn_fsk *fs = obj; 570 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 571 ((struct dn_fsk *)key)->fs.fs_nr; 572 573 return (fs->fs.fs_nr == i); 574 } 575 576 static void * 577 fsk_new(uintptr_t key, int flags, void *arg) 578 { 579 struct dn_fsk *fs; 580 581 fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO); 582 if (fs) { 583 set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs)); 584 dn_cfg.fsk_count++; 585 fs->drain_bucket = 0; 586 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 587 } 588 return fs; 589 } 590 591 /* 592 * detach flowset from its current scheduler. Flags as follows: 593 * DN_DETACH removes from the fsk_list 594 * DN_DESTROY deletes individual queues 595 * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked). 596 */ 597 static void 598 fsk_detach(struct dn_fsk *fs, int flags) 599 { 600 if (flags & DN_DELETE_FS) 601 flags |= DN_DESTROY; 602 ND("fs %d from sched %d flags %s %s %s", 603 fs->fs.fs_nr, fs->fs.sched_nr, 604 (flags & DN_DELETE_FS) ? "DEL_FS":"", 605 (flags & DN_DESTROY) ? "DEL":"", 606 (flags & DN_DETACH) ? "DET":""); 607 if (flags & DN_DETACH) { /* detach from the list */ 608 struct dn_fsk_head *h; 609 h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu; 610 SLIST_REMOVE(h, fs, dn_fsk, sch_chain); 611 } 612 /* Free the RED parameters, they will be recomputed on 613 * subsequent attach if needed. 614 */ 615 if (fs->w_q_lookup) 616 free(fs->w_q_lookup, M_DUMMYNET); 617 fs->w_q_lookup = NULL; 618 qht_delete(fs, flags); 619 if (fs->sched && fs->sched->fp->free_fsk) 620 fs->sched->fp->free_fsk(fs); 621 fs->sched = NULL; 622 if (flags & DN_DELETE_FS) { 623 bzero(fs, sizeof(*fs)); /* safety */ 624 free(fs, M_DUMMYNET); 625 dn_cfg.fsk_count--; 626 } else { 627 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 628 } 629 } 630 631 /* 632 * Detach or destroy all flowsets in a list. 633 * flags specifies what to do: 634 * DN_DESTROY: flush all queues 635 * DN_DELETE_FS: DN_DESTROY + destroy flowset 636 * DN_DELETE_FS implies DN_DESTROY 637 */ 638 static void 639 fsk_detach_list(struct dn_fsk_head *h, int flags) 640 { 641 struct dn_fsk *fs; 642 int n = 0; /* only for stats */ 643 644 ND("head %p flags %x", h, flags); 645 while ((fs = SLIST_FIRST(h))) { 646 SLIST_REMOVE_HEAD(h, sch_chain); 647 n++; 648 fsk_detach(fs, flags); 649 } 650 ND("done %d flowsets", n); 651 } 652 653 /* 654 * called on 'queue X delete' -- removes the flowset from fshash, 655 * deletes all queues for the flowset, and removes the flowset. 656 */ 657 static int 658 delete_fs(int i, int locked) 659 { 660 struct dn_fsk *fs; 661 int err = 0; 662 663 if (!locked) 664 DN_BH_WLOCK(); 665 fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL); 666 ND("fs %d found %p", i, fs); 667 if (fs) { 668 fsk_detach(fs, DN_DETACH | DN_DELETE_FS); 669 err = 0; 670 } else 671 err = EINVAL; 672 if (!locked) 673 DN_BH_WUNLOCK(); 674 return err; 675 } 676 677 /*----- end of flowset hashtable support -------------*/ 678 679 /*------------------------------------------------------------ 680 * Scheduler hash. When searching by index we pass sched_nr, 681 * otherwise we pass struct dn_sch * which is the first field in 682 * struct dn_schk so we can cast between the two. We use this trick 683 * because in the create phase (but it should be fixed). 684 */ 685 static uint32_t 686 schk_hash(uintptr_t key, int flags, void *_arg) 687 { 688 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 689 ((struct dn_schk *)key)->sch.sched_nr; 690 return ( (i>>8)^(i>>4)^i ); 691 } 692 693 static int 694 schk_match(void *obj, uintptr_t key, int flags, void *_arg) 695 { 696 struct dn_schk *s = (struct dn_schk *)obj; 697 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 698 ((struct dn_schk *)key)->sch.sched_nr; 699 return (s->sch.sched_nr == i); 700 } 701 702 /* 703 * Create the entry and intialize with the sched hash if needed. 704 * Leave s->fp unset so we can tell whether a dn_ht_find() returns 705 * a new object or a previously existing one. 706 */ 707 static void * 708 schk_new(uintptr_t key, int flags, void *arg) 709 { 710 struct schk_new_arg *a = arg; 711 struct dn_schk *s; 712 int l = sizeof(*s) +a->fp->schk_datalen; 713 714 s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 715 if (s == NULL) 716 return NULL; 717 set_oid(&s->link.oid, DN_LINK, sizeof(s->link)); 718 s->sch = *a->sch; // copy initial values 719 s->link.link_nr = s->sch.sched_nr; 720 SLIST_INIT(&s->fsk_list); 721 /* initialize the hash table or create the single instance */ 722 s->fp = a->fp; /* si_new needs this */ 723 s->drain_bucket = 0; 724 if (s->sch.flags & DN_HAVE_MASK) { 725 s->siht = dn_ht_init(NULL, s->sch.buckets, 726 offsetof(struct dn_sch_inst, si_next), 727 si_hash, si_match, si_new); 728 if (s->siht == NULL) { 729 free(s, M_DUMMYNET); 730 return NULL; 731 } 732 } 733 s->fp = NULL; /* mark as a new scheduler */ 734 dn_cfg.schk_count++; 735 return s; 736 } 737 738 /* 739 * Callback for sched delete. Notify all attached flowsets to 740 * detach from the scheduler, destroy the internal flowset, and 741 * all instances. The scheduler goes away too. 742 * arg is 0 (only detach flowsets and destroy instances) 743 * DN_DESTROY (detach & delete queues, delete schk) 744 * or DN_DELETE_FS (delete queues and flowsets, delete schk) 745 */ 746 static int 747 schk_delete_cb(void *obj, void *arg) 748 { 749 struct dn_schk *s = obj; 750 #if 0 751 int a = (int)arg; 752 ND("sched %d arg %s%s", 753 s->sch.sched_nr, 754 a&DN_DESTROY ? "DEL ":"", 755 a&DN_DELETE_FS ? "DEL_FS":""); 756 #endif 757 fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); 758 /* no more flowset pointing to us now */ 759 if (s->sch.flags & DN_HAVE_MASK) { 760 dn_ht_scan(s->siht, si_destroy, NULL); 761 dn_ht_free(s->siht, 0); 762 } else if (s->siht) 763 si_destroy(s->siht, NULL); 764 if (s->profile) { 765 free(s->profile, M_DUMMYNET); 766 s->profile = NULL; 767 } 768 s->siht = NULL; 769 if (s->fp->destroy) 770 s->fp->destroy(s); 771 bzero(s, sizeof(*s)); // safety 772 free(obj, M_DUMMYNET); 773 dn_cfg.schk_count--; 774 return DNHT_SCAN_DEL; 775 } 776 777 /* 778 * called on a 'sched X delete' command. Deletes a single scheduler. 779 * This is done by removing from the schedhash, unlinking all 780 * flowsets and deleting their traffic. 781 */ 782 static int 783 delete_schk(int i) 784 { 785 struct dn_schk *s; 786 787 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 788 ND("%d %p", i, s); 789 if (!s) 790 return EINVAL; 791 delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */ 792 /* then detach flowsets, delete traffic */ 793 schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY); 794 return 0; 795 } 796 /*--- end of schk hashtable support ---*/ 797 798 static int 799 copy_obj(char **start, char *end, void *_o, const char *msg, int i) 800 { 801 struct dn_id *o = _o; 802 int have = end - *start; 803 804 if (have < o->len || o->len == 0 || o->type == 0) { 805 D("(WARN) type %d %s %d have %d need %d", 806 o->type, msg, i, have, o->len); 807 return 1; 808 } 809 ND("type %d %s %d len %d", o->type, msg, i, o->len); 810 bcopy(_o, *start, o->len); 811 if (o->type == DN_LINK) { 812 /* Adjust burst parameter for link */ 813 struct dn_link *l = (struct dn_link *)*start; 814 l->burst = div64(l->burst, 8 * hz); 815 l->delay = l->delay * 1000 / hz; 816 } else if (o->type == DN_SCH) { 817 /* Set id->id to the number of instances */ 818 struct dn_schk *s = _o; 819 struct dn_id *id = (struct dn_id *)(*start); 820 id->id = (s->sch.flags & DN_HAVE_MASK) ? 821 dn_ht_entries(s->siht) : (s->siht ? 1 : 0); 822 } 823 *start += o->len; 824 return 0; 825 } 826 827 /* Specific function to copy a queue. 828 * Copies only the user-visible part of a queue (which is in 829 * a struct dn_flow), and sets len accordingly. 830 */ 831 static int 832 copy_obj_q(char **start, char *end, void *_o, const char *msg, int i) 833 { 834 struct dn_id *o = _o; 835 int have = end - *start; 836 int len = sizeof(struct dn_flow); /* see above comment */ 837 838 if (have < len || o->len == 0 || o->type != DN_QUEUE) { 839 D("ERROR type %d %s %d have %d need %d", 840 o->type, msg, i, have, len); 841 return 1; 842 } 843 ND("type %d %s %d len %d", o->type, msg, i, len); 844 bcopy(_o, *start, len); 845 ((struct dn_id*)(*start))->len = len; 846 *start += len; 847 return 0; 848 } 849 850 static int 851 copy_q_cb(void *obj, void *arg) 852 { 853 struct dn_queue *q = obj; 854 struct copy_args *a = arg; 855 struct dn_flow *ni = (struct dn_flow *)(*a->start); 856 if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1)) 857 return DNHT_SCAN_END; 858 ni->oid.type = DN_FLOW; /* override the DN_QUEUE */ 859 ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL); 860 return 0; 861 } 862 863 static int 864 copy_q(struct copy_args *a, struct dn_fsk *fs, int flags) 865 { 866 if (!fs->qht) 867 return 0; 868 if (fs->fs.flags & DN_QHT_HASH) 869 dn_ht_scan(fs->qht, copy_q_cb, a); 870 else 871 copy_q_cb(fs->qht, a); 872 return 0; 873 } 874 875 /* 876 * This routine only copies the initial part of a profile ? XXX 877 */ 878 static int 879 copy_profile(struct copy_args *a, struct dn_profile *p) 880 { 881 int have = a->end - *a->start; 882 /* XXX here we check for max length */ 883 int profile_len = sizeof(struct dn_profile) - 884 ED_MAX_SAMPLES_NO*sizeof(int); 885 886 if (p == NULL) 887 return 0; 888 if (have < profile_len) { 889 D("error have %d need %d", have, profile_len); 890 return 1; 891 } 892 bcopy(p, *a->start, profile_len); 893 ((struct dn_id *)(*a->start))->len = profile_len; 894 *a->start += profile_len; 895 return 0; 896 } 897 898 static int 899 copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags) 900 { 901 struct dn_fs *ufs = (struct dn_fs *)(*a->start); 902 if (!fs) 903 return 0; 904 ND("flowset %d", fs->fs.fs_nr); 905 if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr)) 906 return DNHT_SCAN_END; 907 ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ? 908 dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0); 909 if (flags) { /* copy queues */ 910 copy_q(a, fs, 0); 911 } 912 return 0; 913 } 914 915 static int 916 copy_si_cb(void *obj, void *arg) 917 { 918 struct dn_sch_inst *si = obj; 919 struct copy_args *a = arg; 920 struct dn_flow *ni = (struct dn_flow *)(*a->start); 921 if (copy_obj(a->start, a->end, &si->ni, "inst", 922 si->sched->sch.sched_nr)) 923 return DNHT_SCAN_END; 924 ni->oid.type = DN_FLOW; /* override the DN_SCH_I */ 925 ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL); 926 return 0; 927 } 928 929 static int 930 copy_si(struct copy_args *a, struct dn_schk *s, int flags) 931 { 932 if (s->sch.flags & DN_HAVE_MASK) 933 dn_ht_scan(s->siht, copy_si_cb, a); 934 else if (s->siht) 935 copy_si_cb(s->siht, a); 936 return 0; 937 } 938 939 /* 940 * compute a list of children of a scheduler and copy up 941 */ 942 static int 943 copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags) 944 { 945 struct dn_fsk *fs; 946 struct dn_id *o; 947 uint32_t *p; 948 949 int n = 0, space = sizeof(*o); 950 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 951 if (fs->fs.fs_nr < DN_MAX_ID) 952 n++; 953 } 954 space += n * sizeof(uint32_t); 955 DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n); 956 if (a->end - *(a->start) < space) 957 return DNHT_SCAN_END; 958 o = (struct dn_id *)(*(a->start)); 959 o->len = space; 960 *a->start += o->len; 961 o->type = DN_TEXT; 962 p = (uint32_t *)(o+1); 963 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) 964 if (fs->fs.fs_nr < DN_MAX_ID) 965 *p++ = fs->fs.fs_nr; 966 return 0; 967 } 968 969 static int 970 copy_data_helper(void *_o, void *_arg) 971 { 972 struct copy_args *a = _arg; 973 uint32_t *r = a->extra->r; /* start of first range */ 974 uint32_t *lim; /* first invalid pointer */ 975 int n; 976 977 lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len); 978 979 if (a->type == DN_LINK || a->type == DN_SCH) { 980 /* pipe|sched show, we receive a dn_schk */ 981 struct dn_schk *s = _o; 982 983 n = s->sch.sched_nr; 984 if (a->type == DN_SCH && n >= DN_MAX_ID) 985 return 0; /* not a scheduler */ 986 if (a->type == DN_LINK && n <= DN_MAX_ID) 987 return 0; /* not a pipe */ 988 989 /* see if the object is within one of our ranges */ 990 for (;r < lim; r += 2) { 991 if (n < r[0] || n > r[1]) 992 continue; 993 /* Found a valid entry, copy and we are done */ 994 if (a->flags & DN_C_LINK) { 995 if (copy_obj(a->start, a->end, 996 &s->link, "link", n)) 997 return DNHT_SCAN_END; 998 if (copy_profile(a, s->profile)) 999 return DNHT_SCAN_END; 1000 if (copy_flowset(a, s->fs, 0)) 1001 return DNHT_SCAN_END; 1002 } 1003 if (a->flags & DN_C_SCH) { 1004 if (copy_obj(a->start, a->end, 1005 &s->sch, "sched", n)) 1006 return DNHT_SCAN_END; 1007 /* list all attached flowsets */ 1008 if (copy_fsk_list(a, s, 0)) 1009 return DNHT_SCAN_END; 1010 } 1011 if (a->flags & DN_C_FLOW) 1012 copy_si(a, s, 0); 1013 break; 1014 } 1015 } else if (a->type == DN_FS) { 1016 /* queue show, skip internal flowsets */ 1017 struct dn_fsk *fs = _o; 1018 1019 n = fs->fs.fs_nr; 1020 if (n >= DN_MAX_ID) 1021 return 0; 1022 /* see if the object is within one of our ranges */ 1023 for (;r < lim; r += 2) { 1024 if (n < r[0] || n > r[1]) 1025 continue; 1026 if (copy_flowset(a, fs, 0)) 1027 return DNHT_SCAN_END; 1028 copy_q(a, fs, 0); 1029 break; /* we are done */ 1030 } 1031 } 1032 return 0; 1033 } 1034 1035 static inline struct dn_schk * 1036 locate_scheduler(int i) 1037 { 1038 return dn_ht_find(dn_cfg.schedhash, i, 0, NULL); 1039 } 1040 1041 /* 1042 * red parameters are in fixed point arithmetic. 1043 */ 1044 static int 1045 config_red(struct dn_fsk *fs) 1046 { 1047 int64_t s, idle, weight, w0; 1048 int t, i; 1049 1050 fs->w_q = fs->fs.w_q; 1051 fs->max_p = fs->fs.max_p; 1052 ND("called"); 1053 /* Doing stuff that was in userland */ 1054 i = fs->sched->link.bandwidth; 1055 s = (i <= 0) ? 0 : 1056 hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; 1057 1058 idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */ 1059 fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth); 1060 /* fs->lookup_step not scaled, */ 1061 if (!fs->lookup_step) 1062 fs->lookup_step = 1; 1063 w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled 1064 1065 for (t = fs->lookup_step; t > 1; --t) 1066 weight = SCALE_MUL(weight, w0); 1067 fs->lookup_weight = (int)(weight); // scaled 1068 1069 /* Now doing stuff that was in kerneland */ 1070 fs->min_th = SCALE(fs->fs.min_th); 1071 fs->max_th = SCALE(fs->fs.max_th); 1072 1073 fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th); 1074 fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th)); 1075 1076 if (fs->fs.flags & DN_IS_GENTLE_RED) { 1077 fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th; 1078 fs->c_4 = SCALE(1) - 2 * fs->max_p; 1079 } 1080 1081 /* If the lookup table already exist, free and create it again. */ 1082 if (fs->w_q_lookup) { 1083 free(fs->w_q_lookup, M_DUMMYNET); 1084 fs->w_q_lookup = NULL; 1085 } 1086 if (dn_cfg.red_lookup_depth == 0) { 1087 printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" 1088 "must be > 0\n"); 1089 fs->fs.flags &= ~DN_IS_RED; 1090 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1091 return (EINVAL); 1092 } 1093 fs->lookup_depth = dn_cfg.red_lookup_depth; 1094 fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int), 1095 M_DUMMYNET, M_NOWAIT); 1096 if (fs->w_q_lookup == NULL) { 1097 printf("dummynet: sorry, cannot allocate red lookup table\n"); 1098 fs->fs.flags &= ~DN_IS_RED; 1099 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1100 return(ENOSPC); 1101 } 1102 1103 /* Fill the lookup table with (1 - w_q)^x */ 1104 fs->w_q_lookup[0] = SCALE(1) - fs->w_q; 1105 1106 for (i = 1; i < fs->lookup_depth; i++) 1107 fs->w_q_lookup[i] = 1108 SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight); 1109 1110 if (dn_cfg.red_avg_pkt_size < 1) 1111 dn_cfg.red_avg_pkt_size = 512; 1112 fs->avg_pkt_size = dn_cfg.red_avg_pkt_size; 1113 if (dn_cfg.red_max_pkt_size < 1) 1114 dn_cfg.red_max_pkt_size = 1500; 1115 fs->max_pkt_size = dn_cfg.red_max_pkt_size; 1116 ND("exit"); 1117 return 0; 1118 } 1119 1120 /* Scan all flowset attached to this scheduler and update red */ 1121 static void 1122 update_red(struct dn_schk *s) 1123 { 1124 struct dn_fsk *fs; 1125 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1126 if (fs && (fs->fs.flags & DN_IS_RED)) 1127 config_red(fs); 1128 } 1129 } 1130 1131 /* attach flowset to scheduler s, possibly requeue */ 1132 static void 1133 fsk_attach(struct dn_fsk *fs, struct dn_schk *s) 1134 { 1135 ND("remove fs %d from fsunlinked, link to sched %d", 1136 fs->fs.fs_nr, s->sch.sched_nr); 1137 SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain); 1138 fs->sched = s; 1139 SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); 1140 if (s->fp->new_fsk) 1141 s->fp->new_fsk(fs); 1142 /* XXX compute fsk_mask */ 1143 fs->fsk_mask = fs->fs.flow_mask; 1144 if (fs->sched->sch.flags & DN_HAVE_MASK) 1145 flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask); 1146 if (fs->qht) { 1147 /* 1148 * we must drain qht according to the old 1149 * type, and reinsert according to the new one. 1150 * The requeue is complex -- in general we need to 1151 * reclassify every single packet. 1152 * For the time being, let's hope qht is never set 1153 * when we reach this point. 1154 */ 1155 D("XXX TODO requeue from fs %d to sch %d", 1156 fs->fs.fs_nr, s->sch.sched_nr); 1157 fs->qht = NULL; 1158 } 1159 /* set the new type for qht */ 1160 if (nonzero_mask(&fs->fsk_mask)) 1161 fs->fs.flags |= DN_QHT_HASH; 1162 else 1163 fs->fs.flags &= ~DN_QHT_HASH; 1164 1165 /* XXX config_red() can fail... */ 1166 if (fs->fs.flags & DN_IS_RED) 1167 config_red(fs); 1168 } 1169 1170 /* update all flowsets which may refer to this scheduler */ 1171 static void 1172 update_fs(struct dn_schk *s) 1173 { 1174 struct dn_fsk *fs, *tmp; 1175 1176 SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) { 1177 if (s->sch.sched_nr != fs->fs.sched_nr) { 1178 D("fs %d for sch %d not %d still unlinked", 1179 fs->fs.fs_nr, fs->fs.sched_nr, 1180 s->sch.sched_nr); 1181 continue; 1182 } 1183 fsk_attach(fs, s); 1184 } 1185 } 1186 1187 /* 1188 * Configuration -- to preserve backward compatibility we use 1189 * the following scheme (N is 65536) 1190 * NUMBER SCHED LINK FLOWSET 1191 * 1 .. N-1 (1)WFQ (2)WFQ (3)queue 1192 * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1 1193 * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1 1194 * 1195 * "pipe i config" configures #1, #2 and #3 1196 * "sched i config" configures #1 and possibly #6 1197 * "queue i config" configures #3 1198 * #1 is configured with 'pipe i config' or 'sched i config' 1199 * #2 is configured with 'pipe i config', and created if not 1200 * existing with 'sched i config' 1201 * #3 is configured with 'queue i config' 1202 * #4 is automatically configured after #1, can only be FIFO 1203 * #5 is automatically configured after #2 1204 * #6 is automatically created when #1 is !MULTIQUEUE, 1205 * and can be updated. 1206 * #7 is automatically configured after #2 1207 */ 1208 1209 /* 1210 * configure a link (and its FIFO instance) 1211 */ 1212 static int 1213 config_link(struct dn_link *p, struct dn_id *arg) 1214 { 1215 int i; 1216 1217 if (p->oid.len != sizeof(*p)) { 1218 D("invalid pipe len %d", p->oid.len); 1219 return EINVAL; 1220 } 1221 i = p->link_nr; 1222 if (i <= 0 || i >= DN_MAX_ID) 1223 return EINVAL; 1224 /* 1225 * The config program passes parameters as follows: 1226 * bw = bits/second (0 means no limits), 1227 * delay = ms, must be translated into ticks. 1228 * qsize = slots/bytes 1229 * burst ??? 1230 */ 1231 p->delay = (p->delay * hz) / 1000; 1232 /* Scale burst size: bytes -> bits * hz */ 1233 p->burst *= 8 * hz; 1234 1235 DN_BH_WLOCK(); 1236 /* do it twice, base link and FIFO link */ 1237 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1238 struct dn_schk *s = locate_scheduler(i); 1239 if (s == NULL) { 1240 DN_BH_WUNLOCK(); 1241 D("sched %d not found", i); 1242 return EINVAL; 1243 } 1244 /* remove profile if exists */ 1245 if (s->profile) { 1246 free(s->profile, M_DUMMYNET); 1247 s->profile = NULL; 1248 } 1249 /* copy all parameters */ 1250 s->link.oid = p->oid; 1251 s->link.link_nr = i; 1252 s->link.delay = p->delay; 1253 if (s->link.bandwidth != p->bandwidth) { 1254 /* XXX bandwidth changes, need to update red params */ 1255 s->link.bandwidth = p->bandwidth; 1256 update_red(s); 1257 } 1258 s->link.burst = p->burst; 1259 schk_reset_credit(s); 1260 } 1261 dn_cfg.id++; 1262 DN_BH_WUNLOCK(); 1263 return 0; 1264 } 1265 1266 /* 1267 * configure a flowset. Can be called from inside with locked=1, 1268 */ 1269 static struct dn_fsk * 1270 config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) 1271 { 1272 int i; 1273 struct dn_fsk *fs; 1274 1275 if (nfs->oid.len != sizeof(*nfs)) { 1276 D("invalid flowset len %d", nfs->oid.len); 1277 return NULL; 1278 } 1279 i = nfs->fs_nr; 1280 if (i <= 0 || i >= 3*DN_MAX_ID) 1281 return NULL; 1282 ND("flowset %d", i); 1283 /* XXX other sanity checks */ 1284 if (nfs->flags & DN_QSIZE_BYTES) { 1285 ipdn_bound_var(&nfs->qsize, 16384, 1286 1500, dn_cfg.byte_limit, NULL); // "queue byte size"); 1287 } else { 1288 ipdn_bound_var(&nfs->qsize, 50, 1289 1, dn_cfg.slot_limit, NULL); // "queue slot size"); 1290 } 1291 if (nfs->flags & DN_HAVE_MASK) { 1292 /* make sure we have some buckets */ 1293 ipdn_bound_var((int *)&nfs->buckets, dn_cfg.hash_size, 1294 1, dn_cfg.max_hash_size, "flowset buckets"); 1295 } else { 1296 nfs->buckets = 1; /* we only need 1 */ 1297 } 1298 if (!locked) 1299 DN_BH_WLOCK(); 1300 do { /* exit with break when done */ 1301 struct dn_schk *s; 1302 int flags = nfs->sched_nr ? DNHT_INSERT : 0; 1303 int j; 1304 int oldc = dn_cfg.fsk_count; 1305 fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL); 1306 if (fs == NULL) { 1307 D("missing sched for flowset %d", i); 1308 break; 1309 } 1310 /* grab some defaults from the existing one */ 1311 if (nfs->sched_nr == 0) /* reuse */ 1312 nfs->sched_nr = fs->fs.sched_nr; 1313 for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) { 1314 if (nfs->par[j] == -1) /* reuse */ 1315 nfs->par[j] = fs->fs.par[j]; 1316 } 1317 if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) { 1318 ND("flowset %d unchanged", i); 1319 break; /* no change, nothing to do */ 1320 } 1321 if (oldc != dn_cfg.fsk_count) /* new item */ 1322 dn_cfg.id++; 1323 s = locate_scheduler(nfs->sched_nr); 1324 /* detach from old scheduler if needed, preserving 1325 * queues if we need to reattach. Then update the 1326 * configuration, and possibly attach to the new sched. 1327 */ 1328 DX(2, "fs %d changed sched %d@%p to %d@%p", 1329 fs->fs.fs_nr, 1330 fs->fs.sched_nr, fs->sched, nfs->sched_nr, s); 1331 if (fs->sched) { 1332 int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY); 1333 flags |= DN_DESTROY; /* XXX temporary */ 1334 fsk_detach(fs, flags); 1335 } 1336 fs->fs = *nfs; /* copy configuration */ 1337 if (s != NULL) 1338 fsk_attach(fs, s); 1339 } while (0); 1340 if (!locked) 1341 DN_BH_WUNLOCK(); 1342 return fs; 1343 } 1344 1345 /* 1346 * config/reconfig a scheduler and its FIFO variant. 1347 * For !MULTIQUEUE schedulers, also set up the flowset. 1348 * 1349 * On reconfigurations (detected because s->fp is set), 1350 * detach existing flowsets preserving traffic, preserve link, 1351 * and delete the old scheduler creating a new one. 1352 */ 1353 static int 1354 config_sched(struct dn_sch *_nsch, struct dn_id *arg) 1355 { 1356 struct dn_schk *s; 1357 struct schk_new_arg a; /* argument for schk_new */ 1358 int i; 1359 struct dn_link p; /* copy of oldlink */ 1360 struct dn_profile *pf = NULL; /* copy of old link profile */ 1361 /* Used to preserv mask parameter */ 1362 struct ipfw_flow_id new_mask; 1363 int new_buckets = 0; 1364 int new_flags = 0; 1365 int pipe_cmd; 1366 int err = ENOMEM; 1367 1368 a.sch = _nsch; 1369 if (a.sch->oid.len != sizeof(*a.sch)) { 1370 D("bad sched len %d", a.sch->oid.len); 1371 return EINVAL; 1372 } 1373 i = a.sch->sched_nr; 1374 if (i <= 0 || i >= DN_MAX_ID) 1375 return EINVAL; 1376 /* make sure we have some buckets */ 1377 if (a.sch->flags & DN_HAVE_MASK) 1378 ipdn_bound_var((int *)&a.sch->buckets, dn_cfg.hash_size, 1379 1, dn_cfg.max_hash_size, "sched buckets"); 1380 /* XXX other sanity checks */ 1381 bzero(&p, sizeof(p)); 1382 1383 pipe_cmd = a.sch->flags & DN_PIPE_CMD; 1384 a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set? 1385 if (pipe_cmd) { 1386 /* Copy mask parameter */ 1387 new_mask = a.sch->sched_mask; 1388 new_buckets = a.sch->buckets; 1389 new_flags = a.sch->flags; 1390 } 1391 DN_BH_WLOCK(); 1392 again: /* run twice, for wfq and fifo */ 1393 /* 1394 * lookup the type. If not supplied, use the previous one 1395 * or default to WF2Q+. Otherwise, return an error. 1396 */ 1397 dn_cfg.id++; 1398 a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name); 1399 if (a.fp != NULL) { 1400 /* found. Lookup or create entry */ 1401 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a); 1402 } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) { 1403 /* No type. search existing s* or retry with WF2Q+ */ 1404 s = dn_ht_find(dn_cfg.schedhash, i, 0, &a); 1405 if (s != NULL) { 1406 a.fp = s->fp; 1407 /* Scheduler exists, skip to FIFO scheduler 1408 * if command was pipe config... 1409 */ 1410 if (pipe_cmd) 1411 goto next; 1412 } else { 1413 /* New scheduler, create a wf2q+ with no mask 1414 * if command was pipe config... 1415 */ 1416 if (pipe_cmd) { 1417 /* clear mask parameter */ 1418 bzero(&a.sch->sched_mask, sizeof(new_mask)); 1419 a.sch->buckets = 0; 1420 a.sch->flags &= ~DN_HAVE_MASK; 1421 } 1422 a.sch->oid.subtype = DN_SCHED_WF2QP; 1423 goto again; 1424 } 1425 } else { 1426 D("invalid scheduler type %d %s", 1427 a.sch->oid.subtype, a.sch->name); 1428 err = EINVAL; 1429 goto error; 1430 } 1431 /* normalize name and subtype */ 1432 a.sch->oid.subtype = a.fp->type; 1433 bzero(a.sch->name, sizeof(a.sch->name)); 1434 strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name)); 1435 if (s == NULL) { 1436 D("cannot allocate scheduler %d", i); 1437 goto error; 1438 } 1439 /* restore existing link if any */ 1440 if (p.link_nr) { 1441 s->link = p; 1442 if (!pf || pf->link_nr != p.link_nr) { /* no saved value */ 1443 s->profile = NULL; /* XXX maybe not needed */ 1444 } else { 1445 s->profile = malloc(sizeof(struct dn_profile), 1446 M_DUMMYNET, M_NOWAIT | M_ZERO); 1447 if (s->profile == NULL) { 1448 D("cannot allocate profile"); 1449 goto error; //XXX 1450 } 1451 bcopy(pf, s->profile, sizeof(*pf)); 1452 } 1453 } 1454 p.link_nr = 0; 1455 if (s->fp == NULL) { 1456 DX(2, "sched %d new type %s", i, a.fp->name); 1457 } else if (s->fp != a.fp || 1458 bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) { 1459 /* already existing. */ 1460 DX(2, "sched %d type changed from %s to %s", 1461 i, s->fp->name, a.fp->name); 1462 DX(4, " type/sub %d/%d -> %d/%d", 1463 s->sch.oid.type, s->sch.oid.subtype, 1464 a.sch->oid.type, a.sch->oid.subtype); 1465 if (s->link.link_nr == 0) 1466 D("XXX WARNING link 0 for sched %d", i); 1467 p = s->link; /* preserve link */ 1468 if (s->profile) {/* preserve profile */ 1469 if (!pf) 1470 pf = malloc(sizeof(*pf), 1471 M_DUMMYNET, M_NOWAIT | M_ZERO); 1472 if (pf) /* XXX should issue a warning otherwise */ 1473 bcopy(s->profile, pf, sizeof(*pf)); 1474 } 1475 /* remove from the hash */ 1476 dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 1477 /* Detach flowsets, preserve queues. */ 1478 // schk_delete_cb(s, NULL); 1479 // XXX temporarily, kill queues 1480 schk_delete_cb(s, (void *)DN_DESTROY); 1481 goto again; 1482 } else { 1483 DX(4, "sched %d unchanged type %s", i, a.fp->name); 1484 } 1485 /* complete initialization */ 1486 s->sch = *a.sch; 1487 s->fp = a.fp; 1488 s->cfg = arg; 1489 // XXX schk_reset_credit(s); 1490 /* create the internal flowset if needed, 1491 * trying to reuse existing ones if available 1492 */ 1493 if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) { 1494 s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL); 1495 if (!s->fs) { 1496 struct dn_fs fs; 1497 bzero(&fs, sizeof(fs)); 1498 set_oid(&fs.oid, DN_FS, sizeof(fs)); 1499 fs.fs_nr = i + DN_MAX_ID; 1500 fs.sched_nr = i; 1501 s->fs = config_fs(&fs, NULL, 1 /* locked */); 1502 } 1503 if (!s->fs) { 1504 schk_delete_cb(s, (void *)DN_DESTROY); 1505 D("error creating internal fs for %d", i); 1506 goto error; 1507 } 1508 } 1509 /* call init function after the flowset is created */ 1510 if (s->fp->config) 1511 s->fp->config(s); 1512 update_fs(s); 1513 next: 1514 if (i < DN_MAX_ID) { /* now configure the FIFO instance */ 1515 i += DN_MAX_ID; 1516 if (pipe_cmd) { 1517 /* Restore mask parameter for FIFO */ 1518 a.sch->sched_mask = new_mask; 1519 a.sch->buckets = new_buckets; 1520 a.sch->flags = new_flags; 1521 } else { 1522 /* sched config shouldn't modify the FIFO scheduler */ 1523 if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) { 1524 /* FIFO already exist, don't touch it */ 1525 err = 0; /* and this is not an error */ 1526 goto error; 1527 } 1528 } 1529 a.sch->sched_nr = i; 1530 a.sch->oid.subtype = DN_SCHED_FIFO; 1531 bzero(a.sch->name, sizeof(a.sch->name)); 1532 goto again; 1533 } 1534 err = 0; 1535 error: 1536 DN_BH_WUNLOCK(); 1537 if (pf) 1538 free(pf, M_DUMMYNET); 1539 return err; 1540 } 1541 1542 /* 1543 * attach a profile to a link 1544 */ 1545 static int 1546 config_profile(struct dn_profile *pf, struct dn_id *arg) 1547 { 1548 struct dn_schk *s; 1549 int i, olen, err = 0; 1550 1551 if (pf->oid.len < sizeof(*pf)) { 1552 D("short profile len %d", pf->oid.len); 1553 return EINVAL; 1554 } 1555 i = pf->link_nr; 1556 if (i <= 0 || i >= DN_MAX_ID) 1557 return EINVAL; 1558 /* XXX other sanity checks */ 1559 DN_BH_WLOCK(); 1560 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1561 s = locate_scheduler(i); 1562 1563 if (s == NULL) { 1564 err = EINVAL; 1565 break; 1566 } 1567 dn_cfg.id++; 1568 /* 1569 * If we had a profile and the new one does not fit, 1570 * or it is deleted, then we need to free memory. 1571 */ 1572 if (s->profile && (pf->samples_no == 0 || 1573 s->profile->oid.len < pf->oid.len)) { 1574 free(s->profile, M_DUMMYNET); 1575 s->profile = NULL; 1576 } 1577 if (pf->samples_no == 0) 1578 continue; 1579 /* 1580 * new profile, possibly allocate memory 1581 * and copy data. 1582 */ 1583 if (s->profile == NULL) 1584 s->profile = malloc(pf->oid.len, 1585 M_DUMMYNET, M_NOWAIT | M_ZERO); 1586 if (s->profile == NULL) { 1587 D("no memory for profile %d", i); 1588 err = ENOMEM; 1589 break; 1590 } 1591 /* preserve larger length XXX double check */ 1592 olen = s->profile->oid.len; 1593 if (olen < pf->oid.len) 1594 olen = pf->oid.len; 1595 bcopy(pf, s->profile, pf->oid.len); 1596 s->profile->oid.len = olen; 1597 } 1598 DN_BH_WUNLOCK(); 1599 return err; 1600 } 1601 1602 /* 1603 * Delete all objects: 1604 */ 1605 static void 1606 dummynet_flush(void) 1607 { 1608 1609 /* delete all schedulers and related links/queues/flowsets */ 1610 dn_ht_scan(dn_cfg.schedhash, schk_delete_cb, 1611 (void *)(uintptr_t)DN_DELETE_FS); 1612 /* delete all remaining (unlinked) flowsets */ 1613 DX(4, "still %d unlinked fs", dn_cfg.fsk_count); 1614 dn_ht_free(dn_cfg.fshash, DNHT_REMOVE); 1615 fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS); 1616 /* Reinitialize system heap... */ 1617 heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 1618 } 1619 1620 /* 1621 * Main handler for configuration. We are guaranteed to be called 1622 * with an oid which is at least a dn_id. 1623 * - the first object is the command (config, delete, flush, ...) 1624 * - config_link must be issued after the corresponding config_sched 1625 * - parameters (DN_TXT) for an object must preceed the object 1626 * processed on a config_sched. 1627 */ 1628 int 1629 do_config(void *p, int l) 1630 { 1631 struct dn_id *next, *o; 1632 int err = 0, err2 = 0; 1633 struct dn_id *arg = NULL; 1634 uintptr_t *a; 1635 1636 o = p; 1637 if (o->id != DN_API_VERSION) { 1638 D("invalid api version got %d need %d", 1639 o->id, DN_API_VERSION); 1640 return EINVAL; 1641 } 1642 for (; l >= sizeof(*o); o = next) { 1643 struct dn_id *prev = arg; 1644 if (o->len < sizeof(*o) || l < o->len) { 1645 D("bad len o->len %d len %d", o->len, l); 1646 err = EINVAL; 1647 break; 1648 } 1649 l -= o->len; 1650 next = (struct dn_id *)((char *)o + o->len); 1651 err = 0; 1652 switch (o->type) { 1653 default: 1654 D("cmd %d not implemented", o->type); 1655 break; 1656 1657 #ifdef EMULATE_SYSCTL 1658 /* sysctl emulation. 1659 * if we recognize the command, jump to the correct 1660 * handler and return 1661 */ 1662 case DN_SYSCTL_SET: 1663 err = kesysctl_emu_set(p, l); 1664 return err; 1665 #endif 1666 1667 case DN_CMD_CONFIG: /* simply a header */ 1668 break; 1669 1670 case DN_CMD_DELETE: 1671 /* the argument is in the first uintptr_t after o */ 1672 a = (uintptr_t *)(o+1); 1673 if (o->len < sizeof(*o) + sizeof(*a)) { 1674 err = EINVAL; 1675 break; 1676 } 1677 switch (o->subtype) { 1678 case DN_LINK: 1679 /* delete base and derived schedulers */ 1680 DN_BH_WLOCK(); 1681 err = delete_schk(*a); 1682 err2 = delete_schk(*a + DN_MAX_ID); 1683 DN_BH_WUNLOCK(); 1684 if (!err) 1685 err = err2; 1686 break; 1687 1688 default: 1689 D("invalid delete type %d", 1690 o->subtype); 1691 err = EINVAL; 1692 break; 1693 1694 case DN_FS: 1695 err = (*a <1 || *a >= DN_MAX_ID) ? 1696 EINVAL : delete_fs(*a, 0) ; 1697 break; 1698 } 1699 break; 1700 1701 case DN_CMD_FLUSH: 1702 DN_BH_WLOCK(); 1703 dummynet_flush(); 1704 DN_BH_WUNLOCK(); 1705 break; 1706 case DN_TEXT: /* store argument the next block */ 1707 prev = NULL; 1708 arg = o; 1709 break; 1710 case DN_LINK: 1711 err = config_link((struct dn_link *)o, arg); 1712 break; 1713 case DN_PROFILE: 1714 err = config_profile((struct dn_profile *)o, arg); 1715 break; 1716 case DN_SCH: 1717 err = config_sched((struct dn_sch *)o, arg); 1718 break; 1719 case DN_FS: 1720 err = (NULL==config_fs((struct dn_fs *)o, arg, 0)); 1721 break; 1722 } 1723 if (prev) 1724 arg = NULL; 1725 if (err != 0) 1726 break; 1727 } 1728 return err; 1729 } 1730 1731 static int 1732 compute_space(struct dn_id *cmd, struct copy_args *a) 1733 { 1734 int x = 0, need = 0; 1735 int profile_size = sizeof(struct dn_profile) - 1736 ED_MAX_SAMPLES_NO*sizeof(int); 1737 1738 /* NOTE about compute space: 1739 * NP = dn_cfg.schk_count 1740 * NSI = dn_cfg.si_count 1741 * NF = dn_cfg.fsk_count 1742 * NQ = dn_cfg.queue_count 1743 * - ipfw pipe show 1744 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 1745 * link, scheduler template, flowset 1746 * integrated in scheduler and header 1747 * for flowset list 1748 * (NSI)*(dn_flow) all scheduler instance (includes 1749 * the queue instance) 1750 * - ipfw sched show 1751 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 1752 * link, scheduler template, flowset 1753 * integrated in scheduler and header 1754 * for flowset list 1755 * (NSI * dn_flow) all scheduler instances 1756 * (NF * sizeof(uint_32)) space for flowset list linked to scheduler 1757 * (NQ * dn_queue) all queue [XXXfor now not listed] 1758 * - ipfw queue show 1759 * (NF * dn_fs) all flowset 1760 * (NQ * dn_queue) all queues 1761 */ 1762 switch (cmd->subtype) { 1763 default: 1764 return -1; 1765 /* XXX where do LINK and SCH differ ? */ 1766 /* 'ipfw sched show' could list all queues associated to 1767 * a scheduler. This feature for now is disabled 1768 */ 1769 case DN_LINK: /* pipe show */ 1770 x = DN_C_LINK | DN_C_SCH | DN_C_FLOW; 1771 need += dn_cfg.schk_count * 1772 (sizeof(struct dn_fs) + profile_size) / 2; 1773 need += dn_cfg.fsk_count * sizeof(uint32_t); 1774 break; 1775 case DN_SCH: /* sched show */ 1776 need += dn_cfg.schk_count * 1777 (sizeof(struct dn_fs) + profile_size) / 2; 1778 need += dn_cfg.fsk_count * sizeof(uint32_t); 1779 x = DN_C_SCH | DN_C_LINK | DN_C_FLOW; 1780 break; 1781 case DN_FS: /* queue show */ 1782 x = DN_C_FS | DN_C_QUEUE; 1783 break; 1784 case DN_GET_COMPAT: /* compatibility mode */ 1785 need = dn_compat_calc_size(); 1786 break; 1787 } 1788 a->flags = x; 1789 if (x & DN_C_SCH) { 1790 need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2; 1791 /* NOT also, each fs might be attached to a sched */ 1792 need += dn_cfg.schk_count * sizeof(struct dn_id) / 2; 1793 } 1794 if (x & DN_C_FS) 1795 need += dn_cfg.fsk_count * sizeof(struct dn_fs); 1796 if (x & DN_C_LINK) { 1797 need += dn_cfg.schk_count * sizeof(struct dn_link) / 2; 1798 } 1799 /* 1800 * When exporting a queue to userland, only pass up the 1801 * struct dn_flow, which is the only visible part. 1802 */ 1803 1804 if (x & DN_C_QUEUE) 1805 need += dn_cfg.queue_count * sizeof(struct dn_flow); 1806 if (x & DN_C_FLOW) 1807 need += dn_cfg.si_count * (sizeof(struct dn_flow)); 1808 return need; 1809 } 1810 1811 /* 1812 * If compat != NULL dummynet_get is called in compatibility mode. 1813 * *compat will be the pointer to the buffer to pass to ipfw 1814 */ 1815 int 1816 dummynet_get(struct sockopt *sopt, void **compat) 1817 { 1818 int have, i, need, error; 1819 char *start = NULL, *buf; 1820 size_t sopt_valsize; 1821 struct dn_id *cmd; 1822 struct copy_args a; 1823 struct copy_range r; 1824 int l = sizeof(struct dn_id); 1825 1826 bzero(&a, sizeof(a)); 1827 bzero(&r, sizeof(r)); 1828 1829 /* save and restore original sopt_valsize around copyin */ 1830 sopt_valsize = sopt->sopt_valsize; 1831 1832 cmd = &r.o; 1833 1834 if (!compat) { 1835 /* copy at least an oid, and possibly a full object */ 1836 error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); 1837 sopt->sopt_valsize = sopt_valsize; 1838 if (error) 1839 goto done; 1840 l = cmd->len; 1841 #ifdef EMULATE_SYSCTL 1842 /* sysctl emulation. */ 1843 if (cmd->type == DN_SYSCTL_GET) 1844 return kesysctl_emu_get(sopt); 1845 #endif 1846 if (l > sizeof(r)) { 1847 /* request larger than default, allocate buffer */ 1848 cmd = malloc(l, M_DUMMYNET, M_WAITOK); 1849 error = sooptcopyin(sopt, cmd, l, l); 1850 sopt->sopt_valsize = sopt_valsize; 1851 if (error) 1852 goto done; 1853 } 1854 } else { /* compatibility */ 1855 error = 0; 1856 cmd->type = DN_CMD_GET; 1857 cmd->len = sizeof(struct dn_id); 1858 cmd->subtype = DN_GET_COMPAT; 1859 // cmd->id = sopt_valsize; 1860 D("compatibility mode"); 1861 } 1862 a.extra = (struct copy_range *)cmd; 1863 if (cmd->len == sizeof(*cmd)) { /* no range, create a default */ 1864 uint32_t *rp = (uint32_t *)(cmd + 1); 1865 cmd->len += 2* sizeof(uint32_t); 1866 rp[0] = 1; 1867 rp[1] = DN_MAX_ID - 1; 1868 if (cmd->subtype == DN_LINK) { 1869 rp[0] += DN_MAX_ID; 1870 rp[1] += DN_MAX_ID; 1871 } 1872 } 1873 /* Count space (under lock) and allocate (outside lock). 1874 * Exit with lock held if we manage to get enough buffer. 1875 * Try a few times then give up. 1876 */ 1877 for (have = 0, i = 0; i < 10; i++) { 1878 DN_BH_WLOCK(); 1879 need = compute_space(cmd, &a); 1880 1881 /* if there is a range, ignore value from compute_space() */ 1882 if (l > sizeof(*cmd)) 1883 need = sopt_valsize - sizeof(*cmd); 1884 1885 if (need < 0) { 1886 DN_BH_WUNLOCK(); 1887 error = EINVAL; 1888 goto done; 1889 } 1890 need += sizeof(*cmd); 1891 cmd->id = need; 1892 if (have >= need) 1893 break; 1894 1895 DN_BH_WUNLOCK(); 1896 if (start) 1897 free(start, M_DUMMYNET); 1898 start = NULL; 1899 if (need > sopt_valsize) 1900 break; 1901 1902 have = need; 1903 start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO); 1904 } 1905 1906 if (start == NULL) { 1907 if (compat) { 1908 *compat = NULL; 1909 error = 1; // XXX 1910 } else { 1911 error = sooptcopyout(sopt, cmd, sizeof(*cmd)); 1912 } 1913 goto done; 1914 } 1915 ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " 1916 "%d:%d si %d, %d:%d queues %d", 1917 dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH, 1918 dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK, 1919 dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS, 1920 dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I, 1921 dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE); 1922 sopt->sopt_valsize = sopt_valsize; 1923 a.type = cmd->subtype; 1924 1925 if (compat == NULL) { 1926 bcopy(cmd, start, sizeof(*cmd)); 1927 ((struct dn_id*)(start))->len = sizeof(struct dn_id); 1928 buf = start + sizeof(*cmd); 1929 } else 1930 buf = start; 1931 a.start = &buf; 1932 a.end = start + have; 1933 /* start copying other objects */ 1934 if (compat) { 1935 a.type = DN_COMPAT_PIPE; 1936 dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a); 1937 a.type = DN_COMPAT_QUEUE; 1938 dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a); 1939 } else if (a.type == DN_FS) { 1940 dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a); 1941 } else { 1942 dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a); 1943 } 1944 DN_BH_WUNLOCK(); 1945 1946 if (compat) { 1947 *compat = start; 1948 sopt->sopt_valsize = buf - start; 1949 /* free() is done by ip_dummynet_compat() */ 1950 start = NULL; //XXX hack 1951 } else { 1952 error = sooptcopyout(sopt, start, buf - start); 1953 } 1954 done: 1955 if (cmd && cmd != &r.o) 1956 free(cmd, M_DUMMYNET); 1957 if (start) 1958 free(start, M_DUMMYNET); 1959 return error; 1960 } 1961 1962 /* Callback called on scheduler instance to delete it if idle */ 1963 static int 1964 drain_scheduler_cb(void *_si, void *arg) 1965 { 1966 struct dn_sch_inst *si = _si; 1967 1968 if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL) 1969 return 0; 1970 1971 if (si->sched->fp->flags & DN_MULTIQUEUE) { 1972 if (si->q_count == 0) 1973 return si_destroy(si, NULL); 1974 else 1975 return 0; 1976 } else { /* !DN_MULTIQUEUE */ 1977 if ((si+1)->ni.length == 0) 1978 return si_destroy(si, NULL); 1979 else 1980 return 0; 1981 } 1982 return 0; /* unreachable */ 1983 } 1984 1985 /* Callback called on scheduler to check if it has instances */ 1986 static int 1987 drain_scheduler_sch_cb(void *_s, void *arg) 1988 { 1989 struct dn_schk *s = _s; 1990 1991 if (s->sch.flags & DN_HAVE_MASK) { 1992 dn_ht_scan_bucket(s->siht, &s->drain_bucket, 1993 drain_scheduler_cb, NULL); 1994 s->drain_bucket++; 1995 } else { 1996 if (s->siht) { 1997 if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL) 1998 s->siht = NULL; 1999 } 2000 } 2001 return 0; 2002 } 2003 2004 /* Called every tick, try to delete a 'bucket' of scheduler */ 2005 void 2006 dn_drain_scheduler(void) 2007 { 2008 dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch, 2009 drain_scheduler_sch_cb, NULL); 2010 dn_cfg.drain_sch++; 2011 } 2012 2013 /* Callback called on queue to delete if it is idle */ 2014 static int 2015 drain_queue_cb(void *_q, void *arg) 2016 { 2017 struct dn_queue *q = _q; 2018 2019 if (q->ni.length == 0) { 2020 dn_delete_queue(q, DN_DESTROY); 2021 return DNHT_SCAN_DEL; /* queue is deleted */ 2022 } 2023 2024 return 0; /* queue isn't deleted */ 2025 } 2026 2027 /* Callback called on flowset used to check if it has queues */ 2028 static int 2029 drain_queue_fs_cb(void *_fs, void *arg) 2030 { 2031 struct dn_fsk *fs = _fs; 2032 2033 if (fs->fs.flags & DN_QHT_HASH) { 2034 /* Flowset has a hash table for queues */ 2035 dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, 2036 drain_queue_cb, NULL); 2037 fs->drain_bucket++; 2038 } else { 2039 /* No hash table for this flowset, null the pointer 2040 * if the queue is deleted 2041 */ 2042 if (fs->qht) { 2043 if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL) 2044 fs->qht = NULL; 2045 } 2046 } 2047 return 0; 2048 } 2049 2050 /* Called every tick, try to delete a 'bucket' of queue */ 2051 void 2052 dn_drain_queue(void) 2053 { 2054 /* scan a bucket of flowset */ 2055 dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs, 2056 drain_queue_fs_cb, NULL); 2057 dn_cfg.drain_fs++; 2058 } 2059 2060 /* 2061 * Handler for the various dummynet socket options 2062 */ 2063 static int 2064 ip_dn_ctl(struct sockopt *sopt) 2065 { 2066 void *p = NULL; 2067 int error, l; 2068 2069 error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); 2070 if (error) 2071 return (error); 2072 2073 /* Disallow sets in really-really secure mode. */ 2074 if (sopt->sopt_dir == SOPT_SET) { 2075 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 2076 if (error) 2077 return (error); 2078 } 2079 2080 switch (sopt->sopt_name) { 2081 default : 2082 D("dummynet: unknown option %d", sopt->sopt_name); 2083 error = EINVAL; 2084 break; 2085 2086 case IP_DUMMYNET_FLUSH: 2087 case IP_DUMMYNET_CONFIGURE: 2088 case IP_DUMMYNET_DEL: /* remove a pipe or queue */ 2089 case IP_DUMMYNET_GET: 2090 D("dummynet: compat option %d", sopt->sopt_name); 2091 error = ip_dummynet_compat(sopt); 2092 break; 2093 2094 case IP_DUMMYNET3 : 2095 if (sopt->sopt_dir == SOPT_GET) { 2096 error = dummynet_get(sopt, NULL); 2097 break; 2098 } 2099 l = sopt->sopt_valsize; 2100 if (l < sizeof(struct dn_id) || l > 12000) { 2101 D("argument len %d invalid", l); 2102 break; 2103 } 2104 p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ? 2105 error = sooptcopyin(sopt, p, l, l); 2106 if (error) 2107 break ; 2108 error = do_config(p, l); 2109 break; 2110 } 2111 2112 if (p != NULL) 2113 free(p, M_TEMP); 2114 2115 return error ; 2116 } 2117 2118 2119 static void 2120 ip_dn_init(void) 2121 { 2122 if (dn_cfg.init_done) 2123 return; 2124 printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet); 2125 dn_cfg.init_done = 1; 2126 /* Set defaults here. MSVC does not accept initializers, 2127 * and this is also useful for vimages 2128 */ 2129 /* queue limits */ 2130 dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */ 2131 dn_cfg.byte_limit = 1024 * 1024; 2132 dn_cfg.expire = 1; 2133 2134 /* RED parameters */ 2135 dn_cfg.red_lookup_depth = 256; /* default lookup table depth */ 2136 dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */ 2137 dn_cfg.red_max_pkt_size = 1500; /* default max packet size */ 2138 2139 /* hash tables */ 2140 dn_cfg.max_hash_size = 65536; /* max in the hash tables */ 2141 dn_cfg.hash_size = 64; /* default hash size */ 2142 2143 /* create hash tables for schedulers and flowsets. 2144 * In both we search by key and by pointer. 2145 */ 2146 dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size, 2147 offsetof(struct dn_schk, schk_next), 2148 schk_hash, schk_match, schk_new); 2149 dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size, 2150 offsetof(struct dn_fsk, fsk_next), 2151 fsk_hash, fsk_match, fsk_new); 2152 2153 /* bucket index to drain object */ 2154 dn_cfg.drain_fs = 0; 2155 dn_cfg.drain_sch = 0; 2156 2157 heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 2158 SLIST_INIT(&dn_cfg.fsu); 2159 SLIST_INIT(&dn_cfg.schedlist); 2160 2161 DN_LOCK_INIT(); 2162 2163 TASK_INIT(&dn_task, 0, dummynet_task, curvnet); 2164 dn_tq = taskqueue_create_fast("dummynet", M_WAITOK, 2165 taskqueue_thread_enqueue, &dn_tq); 2166 taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); 2167 2168 callout_init(&dn_timeout, CALLOUT_MPSAFE); 2169 dn_reschedule(); 2170 2171 /* Initialize curr_time adjustment mechanics. */ 2172 getmicrouptime(&dn_cfg.prev_t); 2173 } 2174 2175 static void 2176 ip_dn_destroy(int last) 2177 { 2178 callout_drain(&dn_timeout); 2179 2180 DN_BH_WLOCK(); 2181 if (last) { 2182 ND("removing last instance\n"); 2183 ip_dn_ctl_ptr = NULL; 2184 ip_dn_io_ptr = NULL; 2185 } 2186 2187 dummynet_flush(); 2188 DN_BH_WUNLOCK(); 2189 taskqueue_drain(dn_tq, &dn_task); 2190 taskqueue_free(dn_tq); 2191 2192 dn_ht_free(dn_cfg.schedhash, 0); 2193 dn_ht_free(dn_cfg.fshash, 0); 2194 heap_free(&dn_cfg.evheap); 2195 2196 DN_LOCK_DESTROY(); 2197 } 2198 2199 static int 2200 dummynet_modevent(module_t mod, int type, void *data) 2201 { 2202 2203 if (type == MOD_LOAD) { 2204 if (ip_dn_io_ptr) { 2205 printf("DUMMYNET already loaded\n"); 2206 return EEXIST ; 2207 } 2208 ip_dn_init(); 2209 ip_dn_ctl_ptr = ip_dn_ctl; 2210 ip_dn_io_ptr = dummynet_io; 2211 return 0; 2212 } else if (type == MOD_UNLOAD) { 2213 ip_dn_destroy(1 /* last */); 2214 return 0; 2215 } else 2216 return EOPNOTSUPP; 2217 } 2218 2219 /* modevent helpers for the modules */ 2220 static int 2221 load_dn_sched(struct dn_alg *d) 2222 { 2223 struct dn_alg *s; 2224 2225 if (d == NULL) 2226 return 1; /* error */ 2227 ip_dn_init(); /* just in case, we need the lock */ 2228 2229 /* Check that mandatory funcs exists */ 2230 if (d->enqueue == NULL || d->dequeue == NULL) { 2231 D("missing enqueue or dequeue for %s", d->name); 2232 return 1; 2233 } 2234 2235 /* Search if scheduler already exists */ 2236 DN_BH_WLOCK(); 2237 SLIST_FOREACH(s, &dn_cfg.schedlist, next) { 2238 if (strcmp(s->name, d->name) == 0) { 2239 D("%s already loaded", d->name); 2240 break; /* scheduler already exists */ 2241 } 2242 } 2243 if (s == NULL) 2244 SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next); 2245 DN_BH_WUNLOCK(); 2246 D("dn_sched %s %sloaded", d->name, s ? "not ":""); 2247 return s ? 1 : 0; 2248 } 2249 2250 static int 2251 unload_dn_sched(struct dn_alg *s) 2252 { 2253 struct dn_alg *tmp, *r; 2254 int err = EINVAL; 2255 2256 ND("called for %s", s->name); 2257 2258 DN_BH_WLOCK(); 2259 SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) { 2260 if (strcmp(s->name, r->name) != 0) 2261 continue; 2262 ND("ref_count = %d", r->ref_count); 2263 err = (r->ref_count != 0) ? EBUSY : 0; 2264 if (err == 0) 2265 SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next); 2266 break; 2267 } 2268 DN_BH_WUNLOCK(); 2269 D("dn_sched %s %sunloaded", s->name, err ? "not ":""); 2270 return err; 2271 } 2272 2273 int 2274 dn_sched_modevent(module_t mod, int cmd, void *arg) 2275 { 2276 struct dn_alg *sch = arg; 2277 2278 if (cmd == MOD_LOAD) 2279 return load_dn_sched(sch); 2280 else if (cmd == MOD_UNLOAD) 2281 return unload_dn_sched(sch); 2282 else 2283 return EINVAL; 2284 } 2285 2286 static moduledata_t dummynet_mod = { 2287 "dummynet", dummynet_modevent, NULL 2288 }; 2289 2290 #define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN 2291 #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ 2292 DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); 2293 MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); 2294 MODULE_VERSION(dummynet, 3); 2295 2296 /* 2297 * Starting up. Done in order after dummynet_modevent() has been called. 2298 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2299 */ 2300 //VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL); 2301 2302 /* 2303 * Shutdown handlers up shop. These are done in REVERSE ORDER, but still 2304 * after dummynet_modevent() has been called. Not called on reboot. 2305 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2306 * or when the module is unloaded. 2307 */ 2308 //VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL); 2309 2310 /* end of file */ 2311