1da8ae05dSGleb Smirnoff /*-
2772e66a6SGleb Smirnoff * Copyright (C) 1997-2003
3772e66a6SGleb Smirnoff * Sony Computer Science Laboratories Inc. All rights reserved.
4772e66a6SGleb Smirnoff *
5772e66a6SGleb Smirnoff * Redistribution and use in source and binary forms, with or without
6772e66a6SGleb Smirnoff * modification, are permitted provided that the following conditions
7772e66a6SGleb Smirnoff * are met:
8772e66a6SGleb Smirnoff * 1. Redistributions of source code must retain the above copyright
9772e66a6SGleb Smirnoff * notice, this list of conditions and the following disclaimer.
10772e66a6SGleb Smirnoff * 2. Redistributions in binary form must reproduce the above copyright
11772e66a6SGleb Smirnoff * notice, this list of conditions and the following disclaimer in the
12772e66a6SGleb Smirnoff * documentation and/or other materials provided with the distribution.
13772e66a6SGleb Smirnoff *
14772e66a6SGleb Smirnoff * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15772e66a6SGleb Smirnoff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16772e66a6SGleb Smirnoff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17772e66a6SGleb Smirnoff * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18772e66a6SGleb Smirnoff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19772e66a6SGleb Smirnoff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20772e66a6SGleb Smirnoff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21772e66a6SGleb Smirnoff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22772e66a6SGleb Smirnoff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23772e66a6SGleb Smirnoff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24772e66a6SGleb Smirnoff * SUCH DAMAGE.
25772e66a6SGleb Smirnoff *
26772e66a6SGleb Smirnoff */
27da8ae05dSGleb Smirnoff /*-
28772e66a6SGleb Smirnoff * Copyright (c) 1990-1994 Regents of the University of California.
29772e66a6SGleb Smirnoff * All rights reserved.
30772e66a6SGleb Smirnoff *
31772e66a6SGleb Smirnoff * Redistribution and use in source and binary forms, with or without
32772e66a6SGleb Smirnoff * modification, are permitted provided that the following conditions
33772e66a6SGleb Smirnoff * are met:
34772e66a6SGleb Smirnoff * 1. Redistributions of source code must retain the above copyright
35772e66a6SGleb Smirnoff * notice, this list of conditions and the following disclaimer.
36772e66a6SGleb Smirnoff * 2. Redistributions in binary form must reproduce the above copyright
37772e66a6SGleb Smirnoff * notice, this list of conditions and the following disclaimer in the
38772e66a6SGleb Smirnoff * documentation and/or other materials provided with the distribution.
39772e66a6SGleb Smirnoff * 3. All advertising materials mentioning features or use of this software
40772e66a6SGleb Smirnoff * must display the following acknowledgement:
41772e66a6SGleb Smirnoff * This product includes software developed by the Computer Systems
42772e66a6SGleb Smirnoff * Engineering Group at Lawrence Berkeley Laboratory.
43772e66a6SGleb Smirnoff * 4. Neither the name of the University nor of the Laboratory may be used
44772e66a6SGleb Smirnoff * to endorse or promote products derived from this software without
45772e66a6SGleb Smirnoff * specific prior written permission.
46772e66a6SGleb Smirnoff *
47772e66a6SGleb Smirnoff * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48772e66a6SGleb Smirnoff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49772e66a6SGleb Smirnoff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50772e66a6SGleb Smirnoff * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51772e66a6SGleb Smirnoff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52772e66a6SGleb Smirnoff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53772e66a6SGleb Smirnoff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54772e66a6SGleb Smirnoff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55772e66a6SGleb Smirnoff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56772e66a6SGleb Smirnoff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57772e66a6SGleb Smirnoff * SUCH DAMAGE.
58da8ae05dSGleb Smirnoff *
59da8ae05dSGleb Smirnoff * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $
60772e66a6SGleb Smirnoff */
61772e66a6SGleb Smirnoff
62772e66a6SGleb Smirnoff #include "opt_altq.h"
63772e66a6SGleb Smirnoff #include "opt_inet.h"
64772e66a6SGleb Smirnoff #include "opt_inet6.h"
65772e66a6SGleb Smirnoff #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
66772e66a6SGleb Smirnoff
67772e66a6SGleb Smirnoff #include <sys/param.h>
68772e66a6SGleb Smirnoff #include <sys/malloc.h>
69772e66a6SGleb Smirnoff #include <sys/mbuf.h>
70772e66a6SGleb Smirnoff #include <sys/socket.h>
71772e66a6SGleb Smirnoff #include <sys/systm.h>
72772e66a6SGleb Smirnoff #include <sys/errno.h>
73772e66a6SGleb Smirnoff #if 1 /* ALTQ3_COMPAT */
74772e66a6SGleb Smirnoff #include <sys/sockio.h>
75772e66a6SGleb Smirnoff #include <sys/proc.h>
76772e66a6SGleb Smirnoff #include <sys/kernel.h>
77772e66a6SGleb Smirnoff #ifdef ALTQ_FLOWVALVE
78772e66a6SGleb Smirnoff #include <sys/queue.h>
79772e66a6SGleb Smirnoff #include <sys/time.h>
80772e66a6SGleb Smirnoff #endif
81772e66a6SGleb Smirnoff #endif /* ALTQ3_COMPAT */
82772e66a6SGleb Smirnoff
83772e66a6SGleb Smirnoff #include <net/if.h>
84772e66a6SGleb Smirnoff #include <net/if_var.h>
85772e66a6SGleb Smirnoff
86772e66a6SGleb Smirnoff #include <netinet/in.h>
87772e66a6SGleb Smirnoff #include <netinet/in_systm.h>
88772e66a6SGleb Smirnoff #include <netinet/ip.h>
89772e66a6SGleb Smirnoff #ifdef INET6
90772e66a6SGleb Smirnoff #include <netinet/ip6.h>
91772e66a6SGleb Smirnoff #endif
92772e66a6SGleb Smirnoff
93772e66a6SGleb Smirnoff #include <netpfil/pf/pf.h>
94772e66a6SGleb Smirnoff #include <netpfil/pf/pf_altq.h>
95772e66a6SGleb Smirnoff #include <netpfil/pf/pf_mtag.h>
96772e66a6SGleb Smirnoff #include <net/altq/altq.h>
97772e66a6SGleb Smirnoff #include <net/altq/altq_red.h>
98772e66a6SGleb Smirnoff
99772e66a6SGleb Smirnoff /*
100772e66a6SGleb Smirnoff * ALTQ/RED (Random Early Detection) implementation using 32-bit
101772e66a6SGleb Smirnoff * fixed-point calculation.
102772e66a6SGleb Smirnoff *
103772e66a6SGleb Smirnoff * written by kjc using the ns code as a reference.
104772e66a6SGleb Smirnoff * you can learn more about red and ns from Sally's home page at
105772e66a6SGleb Smirnoff * http://www-nrg.ee.lbl.gov/floyd/
106772e66a6SGleb Smirnoff *
107772e66a6SGleb Smirnoff * most of the red parameter values are fixed in this implementation
108772e66a6SGleb Smirnoff * to prevent fixed-point overflow/underflow.
109772e66a6SGleb Smirnoff * if you change the parameters, watch out for overflow/underflow!
110772e66a6SGleb Smirnoff *
111772e66a6SGleb Smirnoff * the parameters used are recommended values by Sally.
112772e66a6SGleb Smirnoff * the corresponding ns config looks:
113772e66a6SGleb Smirnoff * q_weight=0.00195
114772e66a6SGleb Smirnoff * minthresh=5 maxthresh=15 queue-size=60
115772e66a6SGleb Smirnoff * linterm=30
116772e66a6SGleb Smirnoff * dropmech=drop-tail
117772e66a6SGleb Smirnoff * bytes=false (can't be handled by 32-bit fixed-point)
118772e66a6SGleb Smirnoff * doubleq=false dqthresh=false
119772e66a6SGleb Smirnoff * wait=true
120772e66a6SGleb Smirnoff */
121772e66a6SGleb Smirnoff /*
122772e66a6SGleb Smirnoff * alternative red parameters for a slow link.
123772e66a6SGleb Smirnoff *
124772e66a6SGleb Smirnoff * assume the queue length becomes from zero to L and keeps L, it takes
125772e66a6SGleb Smirnoff * N packets for q_avg to reach 63% of L.
126772e66a6SGleb Smirnoff * when q_weight is 0.002, N is about 500 packets.
127772e66a6SGleb Smirnoff * for a slow link like dial-up, 500 packets takes more than 1 minute!
128772e66a6SGleb Smirnoff * when q_weight is 0.008, N is about 127 packets.
129772e66a6SGleb Smirnoff * when q_weight is 0.016, N is about 63 packets.
130772e66a6SGleb Smirnoff * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
131772e66a6SGleb Smirnoff * are allowed for 0.016.
132772e66a6SGleb Smirnoff * see Sally's paper for more details.
133772e66a6SGleb Smirnoff */
134772e66a6SGleb Smirnoff /* normal red parameters */
135772e66a6SGleb Smirnoff #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
136772e66a6SGleb Smirnoff /* q_weight = 0.00195 */
137772e66a6SGleb Smirnoff
138772e66a6SGleb Smirnoff /* red parameters for a slow link */
139772e66a6SGleb Smirnoff #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
140772e66a6SGleb Smirnoff /* q_weight = 0.0078125 */
141772e66a6SGleb Smirnoff
142772e66a6SGleb Smirnoff /* red parameters for a very slow link (e.g., dialup) */
143772e66a6SGleb Smirnoff #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
144772e66a6SGleb Smirnoff /* q_weight = 0.015625 */
145772e66a6SGleb Smirnoff
146772e66a6SGleb Smirnoff /* fixed-point uses 12-bit decimal places */
147772e66a6SGleb Smirnoff #define FP_SHIFT 12 /* fixed-point shift */
148772e66a6SGleb Smirnoff
149772e66a6SGleb Smirnoff /* red parameters for drop probability */
150772e66a6SGleb Smirnoff #define INV_P_MAX 10 /* inverse of max drop probability */
151772e66a6SGleb Smirnoff #define TH_MIN 5 /* min threshold */
152772e66a6SGleb Smirnoff #define TH_MAX 15 /* max threshold */
153772e66a6SGleb Smirnoff
154a4641f4eSPedro F. Giffuni #define RED_LIMIT 60 /* default max queue length */
155772e66a6SGleb Smirnoff #define RED_STATS /* collect statistics */
156772e66a6SGleb Smirnoff
157772e66a6SGleb Smirnoff /*
158772e66a6SGleb Smirnoff * our default policy for forced-drop is drop-tail.
159772e66a6SGleb Smirnoff * (in altq-1.1.2 or earlier, the default was random-drop.
160772e66a6SGleb Smirnoff * but it makes more sense to punish the cause of the surge.)
161772e66a6SGleb Smirnoff * to switch to the random-drop policy, define "RED_RANDOM_DROP".
162772e66a6SGleb Smirnoff */
163772e66a6SGleb Smirnoff
164772e66a6SGleb Smirnoff /* default red parameter values */
165772e66a6SGleb Smirnoff static int default_th_min = TH_MIN;
166772e66a6SGleb Smirnoff static int default_th_max = TH_MAX;
167772e66a6SGleb Smirnoff static int default_inv_pmax = INV_P_MAX;
168772e66a6SGleb Smirnoff
169772e66a6SGleb Smirnoff /*
170772e66a6SGleb Smirnoff * red support routines
171772e66a6SGleb Smirnoff */
172772e66a6SGleb Smirnoff red_t *
red_alloc(int weight,int inv_pmax,int th_min,int th_max,int flags,int pkttime)173772e66a6SGleb Smirnoff red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
174772e66a6SGleb Smirnoff int pkttime)
175772e66a6SGleb Smirnoff {
176772e66a6SGleb Smirnoff red_t *rp;
177772e66a6SGleb Smirnoff int w, i;
178772e66a6SGleb Smirnoff int npkts_per_sec;
179772e66a6SGleb Smirnoff
180772e66a6SGleb Smirnoff rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
181772e66a6SGleb Smirnoff if (rp == NULL)
182772e66a6SGleb Smirnoff return (NULL);
183772e66a6SGleb Smirnoff
184772e66a6SGleb Smirnoff if (weight == 0)
185772e66a6SGleb Smirnoff rp->red_weight = W_WEIGHT;
186772e66a6SGleb Smirnoff else
187772e66a6SGleb Smirnoff rp->red_weight = weight;
188772e66a6SGleb Smirnoff
189772e66a6SGleb Smirnoff /* allocate weight table */
190772e66a6SGleb Smirnoff rp->red_wtab = wtab_alloc(rp->red_weight);
191772e66a6SGleb Smirnoff if (rp->red_wtab == NULL) {
192772e66a6SGleb Smirnoff free(rp, M_DEVBUF);
193772e66a6SGleb Smirnoff return (NULL);
194772e66a6SGleb Smirnoff }
195772e66a6SGleb Smirnoff
196772e66a6SGleb Smirnoff rp->red_avg = 0;
197772e66a6SGleb Smirnoff rp->red_idle = 1;
198772e66a6SGleb Smirnoff
199772e66a6SGleb Smirnoff if (inv_pmax == 0)
200772e66a6SGleb Smirnoff rp->red_inv_pmax = default_inv_pmax;
201772e66a6SGleb Smirnoff else
202772e66a6SGleb Smirnoff rp->red_inv_pmax = inv_pmax;
203772e66a6SGleb Smirnoff if (th_min == 0)
204772e66a6SGleb Smirnoff rp->red_thmin = default_th_min;
205772e66a6SGleb Smirnoff else
206772e66a6SGleb Smirnoff rp->red_thmin = th_min;
207772e66a6SGleb Smirnoff if (th_max == 0)
208772e66a6SGleb Smirnoff rp->red_thmax = default_th_max;
209772e66a6SGleb Smirnoff else
210772e66a6SGleb Smirnoff rp->red_thmax = th_max;
211772e66a6SGleb Smirnoff
212772e66a6SGleb Smirnoff rp->red_flags = flags;
213772e66a6SGleb Smirnoff
214772e66a6SGleb Smirnoff if (pkttime == 0)
215772e66a6SGleb Smirnoff /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
216772e66a6SGleb Smirnoff rp->red_pkttime = 800;
217772e66a6SGleb Smirnoff else
218772e66a6SGleb Smirnoff rp->red_pkttime = pkttime;
219772e66a6SGleb Smirnoff
220772e66a6SGleb Smirnoff if (weight == 0) {
221772e66a6SGleb Smirnoff /* when the link is very slow, adjust red parameters */
222772e66a6SGleb Smirnoff npkts_per_sec = 1000000 / rp->red_pkttime;
223772e66a6SGleb Smirnoff if (npkts_per_sec < 50) {
224772e66a6SGleb Smirnoff /* up to about 400Kbps */
225772e66a6SGleb Smirnoff rp->red_weight = W_WEIGHT_2;
226772e66a6SGleb Smirnoff } else if (npkts_per_sec < 300) {
227772e66a6SGleb Smirnoff /* up to about 2.4Mbps */
228772e66a6SGleb Smirnoff rp->red_weight = W_WEIGHT_1;
229772e66a6SGleb Smirnoff }
230772e66a6SGleb Smirnoff }
231772e66a6SGleb Smirnoff
232772e66a6SGleb Smirnoff /* calculate wshift. weight must be power of 2 */
233772e66a6SGleb Smirnoff w = rp->red_weight;
234772e66a6SGleb Smirnoff for (i = 0; w > 1; i++)
235772e66a6SGleb Smirnoff w = w >> 1;
236772e66a6SGleb Smirnoff rp->red_wshift = i;
237772e66a6SGleb Smirnoff w = 1 << rp->red_wshift;
238772e66a6SGleb Smirnoff if (w != rp->red_weight) {
239772e66a6SGleb Smirnoff printf("invalid weight value %d for red! use %d\n",
240772e66a6SGleb Smirnoff rp->red_weight, w);
241772e66a6SGleb Smirnoff rp->red_weight = w;
242772e66a6SGleb Smirnoff }
243772e66a6SGleb Smirnoff
244772e66a6SGleb Smirnoff /*
245772e66a6SGleb Smirnoff * thmin_s and thmax_s are scaled versions of th_min and th_max
246772e66a6SGleb Smirnoff * to be compared with avg.
247772e66a6SGleb Smirnoff */
248772e66a6SGleb Smirnoff rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
249772e66a6SGleb Smirnoff rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
250772e66a6SGleb Smirnoff
251772e66a6SGleb Smirnoff /*
252772e66a6SGleb Smirnoff * precompute probability denominator
253772e66a6SGleb Smirnoff * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
254772e66a6SGleb Smirnoff */
255772e66a6SGleb Smirnoff rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
256772e66a6SGleb Smirnoff * rp->red_inv_pmax) << FP_SHIFT;
257772e66a6SGleb Smirnoff
258772e66a6SGleb Smirnoff microtime(&rp->red_last);
259772e66a6SGleb Smirnoff return (rp);
260772e66a6SGleb Smirnoff }
261772e66a6SGleb Smirnoff
262772e66a6SGleb Smirnoff void
red_destroy(red_t * rp)263772e66a6SGleb Smirnoff red_destroy(red_t *rp)
264772e66a6SGleb Smirnoff {
265772e66a6SGleb Smirnoff wtab_destroy(rp->red_wtab);
266772e66a6SGleb Smirnoff free(rp, M_DEVBUF);
267772e66a6SGleb Smirnoff }
268772e66a6SGleb Smirnoff
269772e66a6SGleb Smirnoff void
red_getstats(red_t * rp,struct redstats * sp)270772e66a6SGleb Smirnoff red_getstats(red_t *rp, struct redstats *sp)
271772e66a6SGleb Smirnoff {
272772e66a6SGleb Smirnoff sp->q_avg = rp->red_avg >> rp->red_wshift;
273772e66a6SGleb Smirnoff sp->xmit_cnt = rp->red_stats.xmit_cnt;
274772e66a6SGleb Smirnoff sp->drop_cnt = rp->red_stats.drop_cnt;
275772e66a6SGleb Smirnoff sp->drop_forced = rp->red_stats.drop_forced;
276772e66a6SGleb Smirnoff sp->drop_unforced = rp->red_stats.drop_unforced;
277772e66a6SGleb Smirnoff sp->marked_packets = rp->red_stats.marked_packets;
278772e66a6SGleb Smirnoff }
279772e66a6SGleb Smirnoff
280772e66a6SGleb Smirnoff int
red_addq(red_t * rp,class_queue_t * q,struct mbuf * m,struct altq_pktattr * pktattr)281772e66a6SGleb Smirnoff red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
282772e66a6SGleb Smirnoff struct altq_pktattr *pktattr)
283772e66a6SGleb Smirnoff {
284772e66a6SGleb Smirnoff int avg, droptype;
285772e66a6SGleb Smirnoff int n;
286772e66a6SGleb Smirnoff
287772e66a6SGleb Smirnoff avg = rp->red_avg;
288772e66a6SGleb Smirnoff
289772e66a6SGleb Smirnoff /*
290772e66a6SGleb Smirnoff * if we were idle, we pretend that n packets arrived during
291772e66a6SGleb Smirnoff * the idle period.
292772e66a6SGleb Smirnoff */
293772e66a6SGleb Smirnoff if (rp->red_idle) {
294772e66a6SGleb Smirnoff struct timeval now;
295772e66a6SGleb Smirnoff int t;
296772e66a6SGleb Smirnoff
297772e66a6SGleb Smirnoff rp->red_idle = 0;
298772e66a6SGleb Smirnoff microtime(&now);
299772e66a6SGleb Smirnoff t = (now.tv_sec - rp->red_last.tv_sec);
300772e66a6SGleb Smirnoff if (t > 60) {
301772e66a6SGleb Smirnoff /*
302772e66a6SGleb Smirnoff * being idle for more than 1 minute, set avg to zero.
303772e66a6SGleb Smirnoff * this prevents t from overflow.
304772e66a6SGleb Smirnoff */
305772e66a6SGleb Smirnoff avg = 0;
306772e66a6SGleb Smirnoff } else {
307772e66a6SGleb Smirnoff t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
308772e66a6SGleb Smirnoff n = t / rp->red_pkttime - 1;
309772e66a6SGleb Smirnoff
310772e66a6SGleb Smirnoff /* the following line does (avg = (1 - Wq)^n * avg) */
311772e66a6SGleb Smirnoff if (n > 0)
312772e66a6SGleb Smirnoff avg = (avg >> FP_SHIFT) *
313772e66a6SGleb Smirnoff pow_w(rp->red_wtab, n);
314772e66a6SGleb Smirnoff }
315772e66a6SGleb Smirnoff }
316772e66a6SGleb Smirnoff
317772e66a6SGleb Smirnoff /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
318772e66a6SGleb Smirnoff avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
319772e66a6SGleb Smirnoff rp->red_avg = avg; /* save the new value */
320772e66a6SGleb Smirnoff
321772e66a6SGleb Smirnoff /*
322772e66a6SGleb Smirnoff * red_count keeps a tally of arriving traffic that has not
323772e66a6SGleb Smirnoff * been dropped.
324772e66a6SGleb Smirnoff */
325772e66a6SGleb Smirnoff rp->red_count++;
326772e66a6SGleb Smirnoff
327772e66a6SGleb Smirnoff /* see if we drop early */
328772e66a6SGleb Smirnoff droptype = DTYPE_NODROP;
329772e66a6SGleb Smirnoff if (avg >= rp->red_thmin_s && qlen(q) > 1) {
330772e66a6SGleb Smirnoff if (avg >= rp->red_thmax_s) {
331772e66a6SGleb Smirnoff /* avg >= th_max: forced drop */
332772e66a6SGleb Smirnoff droptype = DTYPE_FORCED;
333772e66a6SGleb Smirnoff } else if (rp->red_old == 0) {
334772e66a6SGleb Smirnoff /* first exceeds th_min */
335772e66a6SGleb Smirnoff rp->red_count = 1;
336772e66a6SGleb Smirnoff rp->red_old = 1;
337772e66a6SGleb Smirnoff } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
338772e66a6SGleb Smirnoff rp->red_probd, rp->red_count)) {
339772e66a6SGleb Smirnoff /* mark or drop by red */
340772e66a6SGleb Smirnoff if ((rp->red_flags & REDF_ECN) &&
341772e66a6SGleb Smirnoff mark_ecn(m, pktattr, rp->red_flags)) {
342772e66a6SGleb Smirnoff /* successfully marked. do not drop. */
343772e66a6SGleb Smirnoff rp->red_count = 0;
344772e66a6SGleb Smirnoff #ifdef RED_STATS
345772e66a6SGleb Smirnoff rp->red_stats.marked_packets++;
346772e66a6SGleb Smirnoff #endif
347772e66a6SGleb Smirnoff } else {
348772e66a6SGleb Smirnoff /* unforced drop by red */
349772e66a6SGleb Smirnoff droptype = DTYPE_EARLY;
350772e66a6SGleb Smirnoff }
351772e66a6SGleb Smirnoff }
352772e66a6SGleb Smirnoff } else {
353772e66a6SGleb Smirnoff /* avg < th_min */
354772e66a6SGleb Smirnoff rp->red_old = 0;
355772e66a6SGleb Smirnoff }
356772e66a6SGleb Smirnoff
357772e66a6SGleb Smirnoff /*
358772e66a6SGleb Smirnoff * if the queue length hits the hard limit, it's a forced drop.
359772e66a6SGleb Smirnoff */
360772e66a6SGleb Smirnoff if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
361772e66a6SGleb Smirnoff droptype = DTYPE_FORCED;
362772e66a6SGleb Smirnoff
363772e66a6SGleb Smirnoff #ifdef RED_RANDOM_DROP
364772e66a6SGleb Smirnoff /* if successful or forced drop, enqueue this packet. */
365772e66a6SGleb Smirnoff if (droptype != DTYPE_EARLY)
366772e66a6SGleb Smirnoff _addq(q, m);
367772e66a6SGleb Smirnoff #else
368772e66a6SGleb Smirnoff /* if successful, enqueue this packet. */
369772e66a6SGleb Smirnoff if (droptype == DTYPE_NODROP)
370772e66a6SGleb Smirnoff _addq(q, m);
371772e66a6SGleb Smirnoff #endif
372772e66a6SGleb Smirnoff if (droptype != DTYPE_NODROP) {
373772e66a6SGleb Smirnoff if (droptype == DTYPE_EARLY) {
374772e66a6SGleb Smirnoff /* drop the incoming packet */
375772e66a6SGleb Smirnoff #ifdef RED_STATS
376772e66a6SGleb Smirnoff rp->red_stats.drop_unforced++;
377772e66a6SGleb Smirnoff #endif
378772e66a6SGleb Smirnoff } else {
379772e66a6SGleb Smirnoff /* forced drop, select a victim packet in the queue. */
380772e66a6SGleb Smirnoff #ifdef RED_RANDOM_DROP
381772e66a6SGleb Smirnoff m = _getq_random(q);
382772e66a6SGleb Smirnoff #endif
383772e66a6SGleb Smirnoff #ifdef RED_STATS
384772e66a6SGleb Smirnoff rp->red_stats.drop_forced++;
385772e66a6SGleb Smirnoff #endif
386772e66a6SGleb Smirnoff }
387772e66a6SGleb Smirnoff #ifdef RED_STATS
388772e66a6SGleb Smirnoff PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
389772e66a6SGleb Smirnoff #endif
390772e66a6SGleb Smirnoff rp->red_count = 0;
391772e66a6SGleb Smirnoff m_freem(m);
392772e66a6SGleb Smirnoff return (-1);
393772e66a6SGleb Smirnoff }
394772e66a6SGleb Smirnoff /* successfully queued */
395772e66a6SGleb Smirnoff #ifdef RED_STATS
396772e66a6SGleb Smirnoff PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
397772e66a6SGleb Smirnoff #endif
398772e66a6SGleb Smirnoff return (0);
399772e66a6SGleb Smirnoff }
400772e66a6SGleb Smirnoff
401772e66a6SGleb Smirnoff /*
402772e66a6SGleb Smirnoff * early-drop probability is calculated as follows:
403772e66a6SGleb Smirnoff * prob = p_max * (avg - th_min) / (th_max - th_min)
404772e66a6SGleb Smirnoff * prob_a = prob / (2 - count*prob)
405772e66a6SGleb Smirnoff * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
406772e66a6SGleb Smirnoff * here prob_a increases as successive undrop count increases.
407772e66a6SGleb Smirnoff * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
408772e66a6SGleb Smirnoff * becomes 1 when (count >= (2 / prob))).
409772e66a6SGleb Smirnoff */
410772e66a6SGleb Smirnoff int
drop_early(int fp_len,int fp_probd,int count)411772e66a6SGleb Smirnoff drop_early(int fp_len, int fp_probd, int count)
412772e66a6SGleb Smirnoff {
413772e66a6SGleb Smirnoff int d; /* denominator of drop-probability */
414772e66a6SGleb Smirnoff
415772e66a6SGleb Smirnoff d = fp_probd - count * fp_len;
416772e66a6SGleb Smirnoff if (d <= 0)
417772e66a6SGleb Smirnoff /* count exceeds the hard limit: drop or mark */
418772e66a6SGleb Smirnoff return (1);
419772e66a6SGleb Smirnoff
420772e66a6SGleb Smirnoff /*
421772e66a6SGleb Smirnoff * now the range of d is [1..600] in fixed-point. (when
422772e66a6SGleb Smirnoff * th_max-th_min=10 and p_max=1/30)
423772e66a6SGleb Smirnoff * drop probability = (avg - TH_MIN) / d
424772e66a6SGleb Smirnoff */
425772e66a6SGleb Smirnoff
426772e66a6SGleb Smirnoff if ((arc4random() % d) < fp_len) {
427772e66a6SGleb Smirnoff /* drop or mark */
428772e66a6SGleb Smirnoff return (1);
429772e66a6SGleb Smirnoff }
430772e66a6SGleb Smirnoff /* no drop/mark */
431772e66a6SGleb Smirnoff return (0);
432772e66a6SGleb Smirnoff }
433772e66a6SGleb Smirnoff
434772e66a6SGleb Smirnoff /*
435772e66a6SGleb Smirnoff * try to mark CE bit to the packet.
436772e66a6SGleb Smirnoff * returns 1 if successfully marked, 0 otherwise.
437772e66a6SGleb Smirnoff */
438772e66a6SGleb Smirnoff int
mark_ecn(struct mbuf * m,struct altq_pktattr * pktattr,int flags)439772e66a6SGleb Smirnoff mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
440772e66a6SGleb Smirnoff {
441772e66a6SGleb Smirnoff struct mbuf *m0;
442772e66a6SGleb Smirnoff struct pf_mtag *at;
443772e66a6SGleb Smirnoff void *hdr;
444772e66a6SGleb Smirnoff
445772e66a6SGleb Smirnoff at = pf_find_mtag(m);
446772e66a6SGleb Smirnoff if (at != NULL) {
447772e66a6SGleb Smirnoff hdr = at->hdr;
448772e66a6SGleb Smirnoff } else
449772e66a6SGleb Smirnoff return (0);
450772e66a6SGleb Smirnoff
451772e66a6SGleb Smirnoff /* verify that pattr_hdr is within the mbuf data */
452772e66a6SGleb Smirnoff for (m0 = m; m0 != NULL; m0 = m0->m_next)
453772e66a6SGleb Smirnoff if (((caddr_t)hdr >= m0->m_data) &&
454772e66a6SGleb Smirnoff ((caddr_t)hdr < m0->m_data + m0->m_len))
455772e66a6SGleb Smirnoff break;
456772e66a6SGleb Smirnoff if (m0 == NULL) {
457772e66a6SGleb Smirnoff /* ick, tag info is stale */
458772e66a6SGleb Smirnoff return (0);
459772e66a6SGleb Smirnoff }
460772e66a6SGleb Smirnoff
461772e66a6SGleb Smirnoff switch (((struct ip *)hdr)->ip_v) {
462772e66a6SGleb Smirnoff case IPVERSION:
463772e66a6SGleb Smirnoff if (flags & REDF_ECN4) {
464772e66a6SGleb Smirnoff struct ip *ip = hdr;
465772e66a6SGleb Smirnoff u_int8_t otos;
466772e66a6SGleb Smirnoff int sum;
467772e66a6SGleb Smirnoff
468772e66a6SGleb Smirnoff if (ip->ip_v != 4)
469772e66a6SGleb Smirnoff return (0); /* version mismatch! */
470772e66a6SGleb Smirnoff
471772e66a6SGleb Smirnoff if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
472772e66a6SGleb Smirnoff return (0); /* not-ECT */
473772e66a6SGleb Smirnoff if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
474772e66a6SGleb Smirnoff return (1); /* already marked */
475772e66a6SGleb Smirnoff
476772e66a6SGleb Smirnoff /*
477772e66a6SGleb Smirnoff * ecn-capable but not marked,
478772e66a6SGleb Smirnoff * mark CE and update checksum
479772e66a6SGleb Smirnoff */
480772e66a6SGleb Smirnoff otos = ip->ip_tos;
481772e66a6SGleb Smirnoff ip->ip_tos |= IPTOS_ECN_CE;
482772e66a6SGleb Smirnoff /*
483772e66a6SGleb Smirnoff * update checksum (from RFC1624)
484772e66a6SGleb Smirnoff * HC' = ~(~HC + ~m + m')
485772e66a6SGleb Smirnoff */
486772e66a6SGleb Smirnoff sum = ~ntohs(ip->ip_sum) & 0xffff;
487772e66a6SGleb Smirnoff sum += (~otos & 0xffff) + ip->ip_tos;
488772e66a6SGleb Smirnoff sum = (sum >> 16) + (sum & 0xffff);
489772e66a6SGleb Smirnoff sum += (sum >> 16); /* add carry */
490772e66a6SGleb Smirnoff ip->ip_sum = htons(~sum & 0xffff);
491772e66a6SGleb Smirnoff return (1);
492772e66a6SGleb Smirnoff }
493772e66a6SGleb Smirnoff break;
494772e66a6SGleb Smirnoff #ifdef INET6
495772e66a6SGleb Smirnoff case (IPV6_VERSION >> 4):
496772e66a6SGleb Smirnoff if (flags & REDF_ECN6) {
497772e66a6SGleb Smirnoff struct ip6_hdr *ip6 = hdr;
498772e66a6SGleb Smirnoff u_int32_t flowlabel;
499772e66a6SGleb Smirnoff
500772e66a6SGleb Smirnoff flowlabel = ntohl(ip6->ip6_flow);
501772e66a6SGleb Smirnoff if ((flowlabel >> 28) != 6)
502772e66a6SGleb Smirnoff return (0); /* version mismatch! */
503772e66a6SGleb Smirnoff if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
504772e66a6SGleb Smirnoff (IPTOS_ECN_NOTECT << 20))
505772e66a6SGleb Smirnoff return (0); /* not-ECT */
506772e66a6SGleb Smirnoff if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
507772e66a6SGleb Smirnoff (IPTOS_ECN_CE << 20))
508772e66a6SGleb Smirnoff return (1); /* already marked */
509772e66a6SGleb Smirnoff /*
510772e66a6SGleb Smirnoff * ecn-capable but not marked, mark CE
511772e66a6SGleb Smirnoff */
512772e66a6SGleb Smirnoff flowlabel |= (IPTOS_ECN_CE << 20);
513772e66a6SGleb Smirnoff ip6->ip6_flow = htonl(flowlabel);
514772e66a6SGleb Smirnoff return (1);
515772e66a6SGleb Smirnoff }
516772e66a6SGleb Smirnoff break;
517772e66a6SGleb Smirnoff #endif /* INET6 */
518772e66a6SGleb Smirnoff }
519772e66a6SGleb Smirnoff
520772e66a6SGleb Smirnoff /* not marked */
521772e66a6SGleb Smirnoff return (0);
522772e66a6SGleb Smirnoff }
523772e66a6SGleb Smirnoff
524772e66a6SGleb Smirnoff struct mbuf *
red_getq(red_t * rp,class_queue_t * q)525*c492eb60SMateusz Guzik red_getq(red_t *rp, class_queue_t *q)
526772e66a6SGleb Smirnoff {
527772e66a6SGleb Smirnoff struct mbuf *m;
528772e66a6SGleb Smirnoff
529772e66a6SGleb Smirnoff if ((m = _getq(q)) == NULL) {
530772e66a6SGleb Smirnoff if (rp->red_idle == 0) {
531772e66a6SGleb Smirnoff rp->red_idle = 1;
532772e66a6SGleb Smirnoff microtime(&rp->red_last);
533772e66a6SGleb Smirnoff }
534772e66a6SGleb Smirnoff return NULL;
535772e66a6SGleb Smirnoff }
536772e66a6SGleb Smirnoff
537772e66a6SGleb Smirnoff rp->red_idle = 0;
538772e66a6SGleb Smirnoff return (m);
539772e66a6SGleb Smirnoff }
540772e66a6SGleb Smirnoff
541772e66a6SGleb Smirnoff /*
542772e66a6SGleb Smirnoff * helper routine to calibrate avg during idle.
543772e66a6SGleb Smirnoff * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
544772e66a6SGleb Smirnoff * here Wq = 1/weight and the code assumes Wq is close to zero.
545772e66a6SGleb Smirnoff *
546772e66a6SGleb Smirnoff * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
547772e66a6SGleb Smirnoff */
548772e66a6SGleb Smirnoff static struct wtab *wtab_list = NULL; /* pointer to wtab list */
549772e66a6SGleb Smirnoff
550772e66a6SGleb Smirnoff struct wtab *
wtab_alloc(int weight)551772e66a6SGleb Smirnoff wtab_alloc(int weight)
552772e66a6SGleb Smirnoff {
553772e66a6SGleb Smirnoff struct wtab *w;
554772e66a6SGleb Smirnoff int i;
555772e66a6SGleb Smirnoff
556772e66a6SGleb Smirnoff for (w = wtab_list; w != NULL; w = w->w_next)
557772e66a6SGleb Smirnoff if (w->w_weight == weight) {
558772e66a6SGleb Smirnoff w->w_refcount++;
559772e66a6SGleb Smirnoff return (w);
560772e66a6SGleb Smirnoff }
561772e66a6SGleb Smirnoff
562772e66a6SGleb Smirnoff w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
563772e66a6SGleb Smirnoff if (w == NULL)
564772e66a6SGleb Smirnoff return (NULL);
565772e66a6SGleb Smirnoff w->w_weight = weight;
566772e66a6SGleb Smirnoff w->w_refcount = 1;
567772e66a6SGleb Smirnoff w->w_next = wtab_list;
568772e66a6SGleb Smirnoff wtab_list = w;
569772e66a6SGleb Smirnoff
570772e66a6SGleb Smirnoff /* initialize the weight table */
571772e66a6SGleb Smirnoff w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
572772e66a6SGleb Smirnoff for (i = 1; i < 32; i++) {
573772e66a6SGleb Smirnoff w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
574772e66a6SGleb Smirnoff if (w->w_tab[i] == 0 && w->w_param_max == 0)
575772e66a6SGleb Smirnoff w->w_param_max = 1 << i;
576772e66a6SGleb Smirnoff }
577772e66a6SGleb Smirnoff
578772e66a6SGleb Smirnoff return (w);
579772e66a6SGleb Smirnoff }
580772e66a6SGleb Smirnoff
581772e66a6SGleb Smirnoff int
wtab_destroy(struct wtab * w)582772e66a6SGleb Smirnoff wtab_destroy(struct wtab *w)
583772e66a6SGleb Smirnoff {
584772e66a6SGleb Smirnoff struct wtab *prev;
585772e66a6SGleb Smirnoff
586772e66a6SGleb Smirnoff if (--w->w_refcount > 0)
587772e66a6SGleb Smirnoff return (0);
588772e66a6SGleb Smirnoff
589772e66a6SGleb Smirnoff if (wtab_list == w)
590772e66a6SGleb Smirnoff wtab_list = w->w_next;
591772e66a6SGleb Smirnoff else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
592772e66a6SGleb Smirnoff if (prev->w_next == w) {
593772e66a6SGleb Smirnoff prev->w_next = w->w_next;
594772e66a6SGleb Smirnoff break;
595772e66a6SGleb Smirnoff }
596772e66a6SGleb Smirnoff
597772e66a6SGleb Smirnoff free(w, M_DEVBUF);
598772e66a6SGleb Smirnoff return (0);
599772e66a6SGleb Smirnoff }
600772e66a6SGleb Smirnoff
601772e66a6SGleb Smirnoff int32_t
pow_w(struct wtab * w,int n)602772e66a6SGleb Smirnoff pow_w(struct wtab *w, int n)
603772e66a6SGleb Smirnoff {
604772e66a6SGleb Smirnoff int i, bit;
605772e66a6SGleb Smirnoff int32_t val;
606772e66a6SGleb Smirnoff
607772e66a6SGleb Smirnoff if (n >= w->w_param_max)
608772e66a6SGleb Smirnoff return (0);
609772e66a6SGleb Smirnoff
610772e66a6SGleb Smirnoff val = 1 << FP_SHIFT;
611772e66a6SGleb Smirnoff if (n <= 0)
612772e66a6SGleb Smirnoff return (val);
613772e66a6SGleb Smirnoff
614772e66a6SGleb Smirnoff bit = 1;
615772e66a6SGleb Smirnoff i = 0;
616772e66a6SGleb Smirnoff while (n) {
617772e66a6SGleb Smirnoff if (n & bit) {
618772e66a6SGleb Smirnoff val = (val * w->w_tab[i]) >> FP_SHIFT;
619772e66a6SGleb Smirnoff n &= ~bit;
620772e66a6SGleb Smirnoff }
621772e66a6SGleb Smirnoff i++;
622772e66a6SGleb Smirnoff bit <<= 1;
623772e66a6SGleb Smirnoff }
624772e66a6SGleb Smirnoff return (val);
625772e66a6SGleb Smirnoff }
626772e66a6SGleb Smirnoff
627772e66a6SGleb Smirnoff #endif /* ALTQ_RED */
628