1 /* 2 * Copyright (c) 1998-2000 Luigi Rizzo, Universita` di Pisa 3 * Portions Copyright (c) 2000 Akamba Corp. 4 * All rights reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #ifndef _IP_DUMMYNET_H 31 #define _IP_DUMMYNET_H 32 33 /* 34 * Definition of dummynet data structures. 35 * We first start with the heap which is used by the scheduler. 36 * 37 * Each list contains a set of parameters identifying the pipe, and 38 * a set of packets queued on the pipe itself. 39 * 40 * I could have used queue macros, but the management i have 41 * is pretty simple and this makes the code more portable. 42 */ 43 44 /* 45 * The key for the heap is used for two different values 46 1. timer ticks- max 10K/second, so 32 bits are enough 47 2. virtual times. These increase in steps of len/x, where len is the 48 packet length, and x is either the weight of the flow, or the 49 sum of all weights. 50 If we limit to max 1000 flows and a max weight of 100, then 51 x needs 17 bits. The packet size is 16 bits, so we can easily 52 overflow if we do not allow errors. 53 54 */ 55 typedef u_int64_t dn_key ; /* sorting key */ 56 #define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) 57 #define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) 58 #define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) 59 #define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) 60 /* XXX check names of next two macros */ 61 #define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) 62 #define MY_M 16 /* number of left shift to obtain a larger precision */ 63 /* 64 * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the 65 * virtual time wraps every 15 days. 66 */ 67 68 #define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) ) 69 70 struct dn_heap_entry { 71 dn_key key ; /* sorting key. Topmost element is smallest one */ 72 void *object ; /* object pointer */ 73 } ; 74 75 struct dn_heap { 76 int size ; 77 int elements ; 78 int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ 79 struct dn_heap_entry *p ; /* really an array of "size" entries */ 80 } ; 81 82 /* 83 * MT_DUMMYNET is a new (fake) mbuf type that is prepended to the 84 * packet when it comes out of a pipe. The definition 85 * ought to go in /sys/sys/mbuf.h but here it is less intrusive. 86 */ 87 88 #define MT_DUMMYNET MT_CONTROL 89 90 91 /* 92 * struct dn_pkt identifies a packet in the dummynet queue. The 93 * first part is really an m_hdr for implementation purposes, and some 94 * fields are saved there. When passing the packet back to the ip_input/ 95 * ip_output(), the struct is prepended to the mbuf chain with type 96 * MT_DUMMYNET, and contains the pointer to the matching rule. 97 */ 98 struct dn_pkt { 99 struct m_hdr hdr ; 100 #define dn_next hdr.mh_nextpkt /* next element in queue */ 101 #define DN_NEXT(x) (struct dn_pkt *)(x)->dn_next 102 #define dn_m hdr.mh_next /* packet to be forwarded */ 103 #define dn_dir hdr.mh_flags /* action when pkt extracted from a queue */ 104 #define DN_TO_IP_OUT 1 105 #define DN_TO_IP_IN 2 106 #define DN_TO_BDG_FWD 3 107 108 dn_key output_time; /* when the pkt is due for delivery */ 109 struct ifnet *ifp; /* interface, for ip_output */ 110 struct sockaddr_in *dn_dst ; 111 struct route ro; /* route, for ip_output. MUST COPY */ 112 int flags ; /* flags, for ip_output (IPv6 ?) */ 113 }; 114 115 /* 116 * Overall structure (with WFQ): 117 118 We have 3 data structures definining a pipe and associated queues: 119 + dn_pipe, which contains the main configuration parameters related 120 to delay and bandwidth 121 + dn_flow_set which contains WFQ configuration, flow 122 masks, plr and RED configuration 123 + dn_flow_queue which is the per-flow queue. 124 Multiple dn_flow_set can be linked to the same pipe, and multiple 125 dn_flow_queue can be linked to the same dn_flow_set. 126 127 During configuration we set the dn_flow_set and dn_pipe parameters. 128 At runtime: packets are sent to the dn_flow_set (either WFQ ones, or 129 the one embedded in the dn_pipe for fixed-rate flows) which in turn 130 dispatches them to the appropriate dn_flow_queue (created dynamically 131 according to the masks). 132 The transmit clock for fixed rate flows (ready_event) selects the 133 dn_flow_queue to be used to transmit the next packet. For WF2Q, 134 wfq_ready_event() extract a pipe which in turn selects the right 135 flow using a number of heaps defined into the pipe. 136 137 * 138 */ 139 140 /* 141 * We use per flow queues. Hashing is used to select the right slot, 142 * then we scan the list to match the flow-id. 143 */ 144 struct dn_flow_queue { 145 struct dn_flow_queue *next ; 146 struct ipfw_flow_id id ; 147 struct dn_pkt *head, *tail ; /* queue of packets */ 148 u_int len ; 149 u_int len_bytes ; 150 long numbytes ; /* credit for transmission (dynamic queues) */ 151 152 u_int64_t tot_pkts ; /* statistics counters */ 153 u_int64_t tot_bytes ; 154 u_int32_t drops ; 155 int hash_slot ; /* debugging/diagnostic */ 156 157 /* RED parameters */ 158 int avg ; /* average queue length est. (scaled) */ 159 int count ; /* arrivals since last RED drop */ 160 int random ; /* random value (scaled) */ 161 u_int32_t q_time ; /* start of queue idle time */ 162 163 /* WF2Q+ support */ 164 struct dn_flow_set *fs ; /* parent flow set */ 165 int blh_pos ; /* position in backlogged_heap */ 166 dn_key sched_time ; /* current time when queue enters ready_heap */ 167 168 dn_key S,F ; /* start-time, finishing time */ 169 } ; 170 171 struct dn_flow_set { 172 struct dn_flow_set *next; /* next flow set in all_flow_sets list */ 173 174 u_short fs_nr ; /* flow_set number */ 175 u_short flags_fs; 176 #define DN_HAVE_FLOW_MASK 0x0001 177 #define DN_IS_PIPE 0x4000 178 #define DN_IS_QUEUE 0x8000 179 #define DN_IS_RED 0x0002 180 #define DN_IS_GENTLE_RED 0x0004 181 #define DN_QSIZE_IS_BYTES 0x0008 /* queue measured in bytes */ 182 183 struct dn_pipe *pipe ; /* pointer to parent pipe */ 184 u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ 185 186 int weight ; /* WFQ queue weight */ 187 int qsize ; /* queue size in slots or bytes */ 188 int plr ; /* pkt loss rate (2^31-1 means 100%) */ 189 190 struct ipfw_flow_id flow_mask ; 191 /* hash table of queues onto this flow_set */ 192 int rq_size ; /* number of slots */ 193 int rq_elements ; /* active elements */ 194 struct dn_flow_queue **rq; /* array of rq_size entries */ 195 u_int32_t last_expired ; /* do not expire too frequently */ 196 /* XXX some RED parameters as well ? */ 197 int backlogged ; /* #active queues for this flowset */ 198 199 /* RED parameters */ 200 #define SCALE_RED 16 201 #define SCALE(x) ( (x) << SCALE_RED ) 202 #define SCALE_VAL(x) ( (x) >> SCALE_RED ) 203 #define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) 204 int w_q ; /* queue weight (scaled) */ 205 int max_th ; /* maximum threshold for queue (scaled) */ 206 int min_th ; /* minimum threshold for queue (scaled) */ 207 int max_p ; /* maximum value for p_b (scaled) */ 208 u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ 209 u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ 210 u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ 211 u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ 212 u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ 213 u_int lookup_depth ; /* depth of lookup table */ 214 int lookup_step ; /* granularity inside the lookup table */ 215 int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ 216 int avg_pkt_size ; /* medium packet size */ 217 int max_pkt_size ; /* max packet size */ 218 } ; 219 220 /* 221 * Pipe descriptor. Contains global parameters, delay-line queue. 222 * 223 * For WF2Q support it also has 3 heaps holding dn_flow_queue: 224 * not_eligible_heap, for queues whose start time is higher 225 * than the virtual time. Sorted by start time. 226 * scheduler_heap, for queues eligible for scheduling. Sorted by 227 * finish time. 228 * backlogged_heap, all flows in the two heaps above, sorted by 229 * start time. This is used to compute the virtual time. 230 * 231 */ 232 struct dn_pipe { /* a pipe */ 233 struct dn_pipe *next ; 234 235 int pipe_nr ; /* number */ 236 int bandwidth; /* really, bytes/tick. */ 237 int delay ; /* really, ticks */ 238 239 struct dn_pkt *head, *tail ; /* packets in delay line */ 240 241 /* WF2Q+ */ 242 struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ 243 struct dn_heap not_eligible_heap; /* top extract- key Start time */ 244 struct dn_heap backlogged_heap ; /* random extract - key Start time */ 245 246 dn_key V ; /* virtual time */ 247 int sum; /* sum of weights of all active sessions */ 248 int numbytes; /* bit i can transmit (more or less). */ 249 250 dn_key sched_time ; /* first time pipe is scheduled in ready_heap */ 251 252 /* the tx clock can come from an interface. In this case, the 253 * name is below, and the pointer is filled when the rule is 254 * configured. We identify this by setting the if_name to a 255 * non-empty string. 256 */ 257 char if_name[16]; 258 struct ifnet *ifp ; 259 int ready ; /* set if ifp != NULL and we got a signal from it */ 260 261 struct dn_flow_set fs ; /* used with fixed-rate flows */ 262 }; 263 264 #ifdef _KERNEL 265 266 MALLOC_DECLARE(M_IPFW); 267 268 typedef int ip_dn_ctl_t __P((struct sockopt *)) ; 269 extern ip_dn_ctl_t *ip_dn_ctl_ptr; 270 271 void dn_rule_delete(void *r); /* used in ip_fw.c */ 272 int dummynet_io(int pipe, int dir, 273 struct mbuf *m, struct ifnet *ifp, struct route *ro, 274 struct sockaddr_in * dst, 275 struct ip_fw_chain *rule, int flags); 276 #endif 277 278 #endif /* _IP_DUMMYNET_H */ 279