xref: /freebsd/sys/net/altq/altq_red.c (revision 2ff63af9b88c7413b7d71715b5532625752a248e)
1da8ae05dSGleb Smirnoff /*-
2772e66a6SGleb Smirnoff  * Copyright (C) 1997-2003
3772e66a6SGleb Smirnoff  *	Sony Computer Science Laboratories Inc.  All rights reserved.
4772e66a6SGleb Smirnoff  *
5772e66a6SGleb Smirnoff  * Redistribution and use in source and binary forms, with or without
6772e66a6SGleb Smirnoff  * modification, are permitted provided that the following conditions
7772e66a6SGleb Smirnoff  * are met:
8772e66a6SGleb Smirnoff  * 1. Redistributions of source code must retain the above copyright
9772e66a6SGleb Smirnoff  *    notice, this list of conditions and the following disclaimer.
10772e66a6SGleb Smirnoff  * 2. Redistributions in binary form must reproduce the above copyright
11772e66a6SGleb Smirnoff  *    notice, this list of conditions and the following disclaimer in the
12772e66a6SGleb Smirnoff  *    documentation and/or other materials provided with the distribution.
13772e66a6SGleb Smirnoff  *
14772e66a6SGleb Smirnoff  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15772e66a6SGleb Smirnoff  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16772e66a6SGleb Smirnoff  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17772e66a6SGleb Smirnoff  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18772e66a6SGleb Smirnoff  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19772e66a6SGleb Smirnoff  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20772e66a6SGleb Smirnoff  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21772e66a6SGleb Smirnoff  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22772e66a6SGleb Smirnoff  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23772e66a6SGleb Smirnoff  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24772e66a6SGleb Smirnoff  * SUCH DAMAGE.
25772e66a6SGleb Smirnoff  *
26772e66a6SGleb Smirnoff  */
27da8ae05dSGleb Smirnoff /*-
28772e66a6SGleb Smirnoff  * Copyright (c) 1990-1994 Regents of the University of California.
29772e66a6SGleb Smirnoff  * All rights reserved.
30772e66a6SGleb Smirnoff  *
31772e66a6SGleb Smirnoff  * Redistribution and use in source and binary forms, with or without
32772e66a6SGleb Smirnoff  * modification, are permitted provided that the following conditions
33772e66a6SGleb Smirnoff  * are met:
34772e66a6SGleb Smirnoff  * 1. Redistributions of source code must retain the above copyright
35772e66a6SGleb Smirnoff  *    notice, this list of conditions and the following disclaimer.
36772e66a6SGleb Smirnoff  * 2. Redistributions in binary form must reproduce the above copyright
37772e66a6SGleb Smirnoff  *    notice, this list of conditions and the following disclaimer in the
38772e66a6SGleb Smirnoff  *    documentation and/or other materials provided with the distribution.
39772e66a6SGleb Smirnoff  * 3. All advertising materials mentioning features or use of this software
40772e66a6SGleb Smirnoff  *    must display the following acknowledgement:
41772e66a6SGleb Smirnoff  *	This product includes software developed by the Computer Systems
42772e66a6SGleb Smirnoff  *	Engineering Group at Lawrence Berkeley Laboratory.
43772e66a6SGleb Smirnoff  * 4. Neither the name of the University nor of the Laboratory may be used
44772e66a6SGleb Smirnoff  *    to endorse or promote products derived from this software without
45772e66a6SGleb Smirnoff  *    specific prior written permission.
46772e66a6SGleb Smirnoff  *
47772e66a6SGleb Smirnoff  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48772e66a6SGleb Smirnoff  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49772e66a6SGleb Smirnoff  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50772e66a6SGleb Smirnoff  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51772e66a6SGleb Smirnoff  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52772e66a6SGleb Smirnoff  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53772e66a6SGleb Smirnoff  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54772e66a6SGleb Smirnoff  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55772e66a6SGleb Smirnoff  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56772e66a6SGleb Smirnoff  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57772e66a6SGleb Smirnoff  * SUCH DAMAGE.
58da8ae05dSGleb Smirnoff  *
59da8ae05dSGleb Smirnoff  * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $
60772e66a6SGleb Smirnoff  */
61772e66a6SGleb Smirnoff 
62772e66a6SGleb Smirnoff #include "opt_altq.h"
63772e66a6SGleb Smirnoff #include "opt_inet.h"
64772e66a6SGleb Smirnoff #include "opt_inet6.h"
65772e66a6SGleb Smirnoff #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
66772e66a6SGleb Smirnoff 
67772e66a6SGleb Smirnoff #include <sys/param.h>
68772e66a6SGleb Smirnoff #include <sys/malloc.h>
69772e66a6SGleb Smirnoff #include <sys/mbuf.h>
70772e66a6SGleb Smirnoff #include <sys/socket.h>
71772e66a6SGleb Smirnoff #include <sys/systm.h>
72772e66a6SGleb Smirnoff #include <sys/errno.h>
73772e66a6SGleb Smirnoff #if 1 /* ALTQ3_COMPAT */
74772e66a6SGleb Smirnoff #include <sys/sockio.h>
75772e66a6SGleb Smirnoff #include <sys/proc.h>
76772e66a6SGleb Smirnoff #include <sys/kernel.h>
77772e66a6SGleb Smirnoff #ifdef ALTQ_FLOWVALVE
78772e66a6SGleb Smirnoff #include <sys/queue.h>
79772e66a6SGleb Smirnoff #include <sys/time.h>
80772e66a6SGleb Smirnoff #endif
81772e66a6SGleb Smirnoff #endif /* ALTQ3_COMPAT */
82772e66a6SGleb Smirnoff 
83772e66a6SGleb Smirnoff #include <net/if.h>
84772e66a6SGleb Smirnoff #include <net/if_var.h>
85772e66a6SGleb Smirnoff 
86772e66a6SGleb Smirnoff #include <netinet/in.h>
87772e66a6SGleb Smirnoff #include <netinet/in_systm.h>
88772e66a6SGleb Smirnoff #include <netinet/ip.h>
89772e66a6SGleb Smirnoff #ifdef INET6
90772e66a6SGleb Smirnoff #include <netinet/ip6.h>
91772e66a6SGleb Smirnoff #endif
92772e66a6SGleb Smirnoff 
93772e66a6SGleb Smirnoff #include <netpfil/pf/pf.h>
94772e66a6SGleb Smirnoff #include <netpfil/pf/pf_altq.h>
95772e66a6SGleb Smirnoff #include <netpfil/pf/pf_mtag.h>
96772e66a6SGleb Smirnoff #include <net/altq/altq.h>
97772e66a6SGleb Smirnoff #include <net/altq/altq_red.h>
98772e66a6SGleb Smirnoff 
99772e66a6SGleb Smirnoff /*
100772e66a6SGleb Smirnoff  * ALTQ/RED (Random Early Detection) implementation using 32-bit
101772e66a6SGleb Smirnoff  * fixed-point calculation.
102772e66a6SGleb Smirnoff  *
103772e66a6SGleb Smirnoff  * written by kjc using the ns code as a reference.
104772e66a6SGleb Smirnoff  * you can learn more about red and ns from Sally's home page at
105772e66a6SGleb Smirnoff  * http://www-nrg.ee.lbl.gov/floyd/
106772e66a6SGleb Smirnoff  *
107772e66a6SGleb Smirnoff  * most of the red parameter values are fixed in this implementation
108772e66a6SGleb Smirnoff  * to prevent fixed-point overflow/underflow.
109772e66a6SGleb Smirnoff  * if you change the parameters, watch out for overflow/underflow!
110772e66a6SGleb Smirnoff  *
111772e66a6SGleb Smirnoff  * the parameters used are recommended values by Sally.
112772e66a6SGleb Smirnoff  * the corresponding ns config looks:
113772e66a6SGleb Smirnoff  *	q_weight=0.00195
114772e66a6SGleb Smirnoff  *	minthresh=5 maxthresh=15 queue-size=60
115772e66a6SGleb Smirnoff  *	linterm=30
116772e66a6SGleb Smirnoff  *	dropmech=drop-tail
117772e66a6SGleb Smirnoff  *	bytes=false (can't be handled by 32-bit fixed-point)
118772e66a6SGleb Smirnoff  *	doubleq=false dqthresh=false
119772e66a6SGleb Smirnoff  *	wait=true
120772e66a6SGleb Smirnoff  */
121772e66a6SGleb Smirnoff /*
122772e66a6SGleb Smirnoff  * alternative red parameters for a slow link.
123772e66a6SGleb Smirnoff  *
124772e66a6SGleb Smirnoff  * assume the queue length becomes from zero to L and keeps L, it takes
125772e66a6SGleb Smirnoff  * N packets for q_avg to reach 63% of L.
126772e66a6SGleb Smirnoff  * when q_weight is 0.002, N is about 500 packets.
127772e66a6SGleb Smirnoff  * for a slow link like dial-up, 500 packets takes more than 1 minute!
128772e66a6SGleb Smirnoff  * when q_weight is 0.008, N is about 127 packets.
129772e66a6SGleb Smirnoff  * when q_weight is 0.016, N is about 63 packets.
130772e66a6SGleb Smirnoff  * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
131772e66a6SGleb Smirnoff  * are allowed for 0.016.
132772e66a6SGleb Smirnoff  * see Sally's paper for more details.
133772e66a6SGleb Smirnoff  */
134772e66a6SGleb Smirnoff /* normal red parameters */
135772e66a6SGleb Smirnoff #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
136772e66a6SGleb Smirnoff 				/* q_weight = 0.00195 */
137772e66a6SGleb Smirnoff 
138772e66a6SGleb Smirnoff /* red parameters for a slow link */
139772e66a6SGleb Smirnoff #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
140772e66a6SGleb Smirnoff 				/* q_weight = 0.0078125 */
141772e66a6SGleb Smirnoff 
142772e66a6SGleb Smirnoff /* red parameters for a very slow link (e.g., dialup) */
143772e66a6SGleb Smirnoff #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
144772e66a6SGleb Smirnoff 				/* q_weight = 0.015625 */
145772e66a6SGleb Smirnoff 
146772e66a6SGleb Smirnoff /* fixed-point uses 12-bit decimal places */
147772e66a6SGleb Smirnoff #define	FP_SHIFT	12	/* fixed-point shift */
148772e66a6SGleb Smirnoff 
149772e66a6SGleb Smirnoff /* red parameters for drop probability */
150772e66a6SGleb Smirnoff #define	INV_P_MAX	10	/* inverse of max drop probability */
151772e66a6SGleb Smirnoff #define	TH_MIN		5	/* min threshold */
152772e66a6SGleb Smirnoff #define	TH_MAX		15	/* max threshold */
153772e66a6SGleb Smirnoff 
154a4641f4eSPedro F. Giffuni #define	RED_LIMIT	60	/* default max queue length */
155772e66a6SGleb Smirnoff #define	RED_STATS		/* collect statistics */
156772e66a6SGleb Smirnoff 
157772e66a6SGleb Smirnoff /*
158772e66a6SGleb Smirnoff  * our default policy for forced-drop is drop-tail.
159772e66a6SGleb Smirnoff  * (in altq-1.1.2 or earlier, the default was random-drop.
160772e66a6SGleb Smirnoff  * but it makes more sense to punish the cause of the surge.)
161772e66a6SGleb Smirnoff  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
162772e66a6SGleb Smirnoff  */
163772e66a6SGleb Smirnoff 
164772e66a6SGleb Smirnoff /* default red parameter values */
165772e66a6SGleb Smirnoff static int default_th_min = TH_MIN;
166772e66a6SGleb Smirnoff static int default_th_max = TH_MAX;
167772e66a6SGleb Smirnoff static int default_inv_pmax = INV_P_MAX;
168772e66a6SGleb Smirnoff 
169772e66a6SGleb Smirnoff /*
170772e66a6SGleb Smirnoff  * red support routines
171772e66a6SGleb Smirnoff  */
172772e66a6SGleb Smirnoff red_t *
red_alloc(int weight,int inv_pmax,int th_min,int th_max,int flags,int pkttime)173772e66a6SGleb Smirnoff red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
174772e66a6SGleb Smirnoff    int pkttime)
175772e66a6SGleb Smirnoff {
176772e66a6SGleb Smirnoff 	red_t	*rp;
177772e66a6SGleb Smirnoff 	int	 w, i;
178772e66a6SGleb Smirnoff 	int	 npkts_per_sec;
179772e66a6SGleb Smirnoff 
180772e66a6SGleb Smirnoff 	rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
181772e66a6SGleb Smirnoff 	if (rp == NULL)
182772e66a6SGleb Smirnoff 		return (NULL);
183772e66a6SGleb Smirnoff 
184772e66a6SGleb Smirnoff 	if (weight == 0)
185772e66a6SGleb Smirnoff 		rp->red_weight = W_WEIGHT;
186772e66a6SGleb Smirnoff 	else
187772e66a6SGleb Smirnoff 		rp->red_weight = weight;
188772e66a6SGleb Smirnoff 
189772e66a6SGleb Smirnoff 	/* allocate weight table */
190772e66a6SGleb Smirnoff 	rp->red_wtab = wtab_alloc(rp->red_weight);
191772e66a6SGleb Smirnoff 	if (rp->red_wtab == NULL) {
192772e66a6SGleb Smirnoff 		free(rp, M_DEVBUF);
193772e66a6SGleb Smirnoff 		return (NULL);
194772e66a6SGleb Smirnoff 	}
195772e66a6SGleb Smirnoff 
196772e66a6SGleb Smirnoff 	rp->red_avg = 0;
197772e66a6SGleb Smirnoff 	rp->red_idle = 1;
198772e66a6SGleb Smirnoff 
199772e66a6SGleb Smirnoff 	if (inv_pmax == 0)
200772e66a6SGleb Smirnoff 		rp->red_inv_pmax = default_inv_pmax;
201772e66a6SGleb Smirnoff 	else
202772e66a6SGleb Smirnoff 		rp->red_inv_pmax = inv_pmax;
203772e66a6SGleb Smirnoff 	if (th_min == 0)
204772e66a6SGleb Smirnoff 		rp->red_thmin = default_th_min;
205772e66a6SGleb Smirnoff 	else
206772e66a6SGleb Smirnoff 		rp->red_thmin = th_min;
207772e66a6SGleb Smirnoff 	if (th_max == 0)
208772e66a6SGleb Smirnoff 		rp->red_thmax = default_th_max;
209772e66a6SGleb Smirnoff 	else
210772e66a6SGleb Smirnoff 		rp->red_thmax = th_max;
211772e66a6SGleb Smirnoff 
212772e66a6SGleb Smirnoff 	rp->red_flags = flags;
213772e66a6SGleb Smirnoff 
214772e66a6SGleb Smirnoff 	if (pkttime == 0)
215772e66a6SGleb Smirnoff 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
216772e66a6SGleb Smirnoff 		rp->red_pkttime = 800;
217772e66a6SGleb Smirnoff 	else
218772e66a6SGleb Smirnoff 		rp->red_pkttime = pkttime;
219772e66a6SGleb Smirnoff 
220772e66a6SGleb Smirnoff 	if (weight == 0) {
221772e66a6SGleb Smirnoff 		/* when the link is very slow, adjust red parameters */
222772e66a6SGleb Smirnoff 		npkts_per_sec = 1000000 / rp->red_pkttime;
223772e66a6SGleb Smirnoff 		if (npkts_per_sec < 50) {
224772e66a6SGleb Smirnoff 			/* up to about 400Kbps */
225772e66a6SGleb Smirnoff 			rp->red_weight = W_WEIGHT_2;
226772e66a6SGleb Smirnoff 		} else if (npkts_per_sec < 300) {
227772e66a6SGleb Smirnoff 			/* up to about 2.4Mbps */
228772e66a6SGleb Smirnoff 			rp->red_weight = W_WEIGHT_1;
229772e66a6SGleb Smirnoff 		}
230772e66a6SGleb Smirnoff 	}
231772e66a6SGleb Smirnoff 
232772e66a6SGleb Smirnoff 	/* calculate wshift.  weight must be power of 2 */
233772e66a6SGleb Smirnoff 	w = rp->red_weight;
234772e66a6SGleb Smirnoff 	for (i = 0; w > 1; i++)
235772e66a6SGleb Smirnoff 		w = w >> 1;
236772e66a6SGleb Smirnoff 	rp->red_wshift = i;
237772e66a6SGleb Smirnoff 	w = 1 << rp->red_wshift;
238772e66a6SGleb Smirnoff 	if (w != rp->red_weight) {
239772e66a6SGleb Smirnoff 		printf("invalid weight value %d for red! use %d\n",
240772e66a6SGleb Smirnoff 		       rp->red_weight, w);
241772e66a6SGleb Smirnoff 		rp->red_weight = w;
242772e66a6SGleb Smirnoff 	}
243772e66a6SGleb Smirnoff 
244772e66a6SGleb Smirnoff 	/*
245772e66a6SGleb Smirnoff 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
246772e66a6SGleb Smirnoff 	 * to be compared with avg.
247772e66a6SGleb Smirnoff 	 */
248772e66a6SGleb Smirnoff 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
249772e66a6SGleb Smirnoff 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
250772e66a6SGleb Smirnoff 
251772e66a6SGleb Smirnoff 	/*
252772e66a6SGleb Smirnoff 	 * precompute probability denominator
253772e66a6SGleb Smirnoff 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
254772e66a6SGleb Smirnoff 	 */
255772e66a6SGleb Smirnoff 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
256772e66a6SGleb Smirnoff 			 * rp->red_inv_pmax) << FP_SHIFT;
257772e66a6SGleb Smirnoff 
258772e66a6SGleb Smirnoff 	microtime(&rp->red_last);
259772e66a6SGleb Smirnoff 	return (rp);
260772e66a6SGleb Smirnoff }
261772e66a6SGleb Smirnoff 
262772e66a6SGleb Smirnoff void
red_destroy(red_t * rp)263772e66a6SGleb Smirnoff red_destroy(red_t *rp)
264772e66a6SGleb Smirnoff {
265772e66a6SGleb Smirnoff 	wtab_destroy(rp->red_wtab);
266772e66a6SGleb Smirnoff 	free(rp, M_DEVBUF);
267772e66a6SGleb Smirnoff }
268772e66a6SGleb Smirnoff 
269772e66a6SGleb Smirnoff void
red_getstats(red_t * rp,struct redstats * sp)270772e66a6SGleb Smirnoff red_getstats(red_t *rp, struct redstats *sp)
271772e66a6SGleb Smirnoff {
272772e66a6SGleb Smirnoff 	sp->q_avg		= rp->red_avg >> rp->red_wshift;
273772e66a6SGleb Smirnoff 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
274772e66a6SGleb Smirnoff 	sp->drop_cnt		= rp->red_stats.drop_cnt;
275772e66a6SGleb Smirnoff 	sp->drop_forced		= rp->red_stats.drop_forced;
276772e66a6SGleb Smirnoff 	sp->drop_unforced	= rp->red_stats.drop_unforced;
277772e66a6SGleb Smirnoff 	sp->marked_packets	= rp->red_stats.marked_packets;
278772e66a6SGleb Smirnoff }
279772e66a6SGleb Smirnoff 
280772e66a6SGleb Smirnoff int
red_addq(red_t * rp,class_queue_t * q,struct mbuf * m,struct altq_pktattr * pktattr)281772e66a6SGleb Smirnoff red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
282772e66a6SGleb Smirnoff     struct altq_pktattr *pktattr)
283772e66a6SGleb Smirnoff {
284772e66a6SGleb Smirnoff 	int avg, droptype;
285772e66a6SGleb Smirnoff 	int n;
286772e66a6SGleb Smirnoff 
287772e66a6SGleb Smirnoff 	avg = rp->red_avg;
288772e66a6SGleb Smirnoff 
289772e66a6SGleb Smirnoff 	/*
290772e66a6SGleb Smirnoff 	 * if we were idle, we pretend that n packets arrived during
291772e66a6SGleb Smirnoff 	 * the idle period.
292772e66a6SGleb Smirnoff 	 */
293772e66a6SGleb Smirnoff 	if (rp->red_idle) {
294772e66a6SGleb Smirnoff 		struct timeval now;
295772e66a6SGleb Smirnoff 		int t;
296772e66a6SGleb Smirnoff 
297772e66a6SGleb Smirnoff 		rp->red_idle = 0;
298772e66a6SGleb Smirnoff 		microtime(&now);
299772e66a6SGleb Smirnoff 		t = (now.tv_sec - rp->red_last.tv_sec);
300772e66a6SGleb Smirnoff 		if (t > 60) {
301772e66a6SGleb Smirnoff 			/*
302772e66a6SGleb Smirnoff 			 * being idle for more than 1 minute, set avg to zero.
303772e66a6SGleb Smirnoff 			 * this prevents t from overflow.
304772e66a6SGleb Smirnoff 			 */
305772e66a6SGleb Smirnoff 			avg = 0;
306772e66a6SGleb Smirnoff 		} else {
307772e66a6SGleb Smirnoff 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
308772e66a6SGleb Smirnoff 			n = t / rp->red_pkttime - 1;
309772e66a6SGleb Smirnoff 
310772e66a6SGleb Smirnoff 			/* the following line does (avg = (1 - Wq)^n * avg) */
311772e66a6SGleb Smirnoff 			if (n > 0)
312772e66a6SGleb Smirnoff 				avg = (avg >> FP_SHIFT) *
313772e66a6SGleb Smirnoff 				    pow_w(rp->red_wtab, n);
314772e66a6SGleb Smirnoff 		}
315772e66a6SGleb Smirnoff 	}
316772e66a6SGleb Smirnoff 
317772e66a6SGleb Smirnoff 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
318772e66a6SGleb Smirnoff 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
319772e66a6SGleb Smirnoff 	rp->red_avg = avg;		/* save the new value */
320772e66a6SGleb Smirnoff 
321772e66a6SGleb Smirnoff 	/*
322772e66a6SGleb Smirnoff 	 * red_count keeps a tally of arriving traffic that has not
323772e66a6SGleb Smirnoff 	 * been dropped.
324772e66a6SGleb Smirnoff 	 */
325772e66a6SGleb Smirnoff 	rp->red_count++;
326772e66a6SGleb Smirnoff 
327772e66a6SGleb Smirnoff 	/* see if we drop early */
328772e66a6SGleb Smirnoff 	droptype = DTYPE_NODROP;
329772e66a6SGleb Smirnoff 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
330772e66a6SGleb Smirnoff 		if (avg >= rp->red_thmax_s) {
331772e66a6SGleb Smirnoff 			/* avg >= th_max: forced drop */
332772e66a6SGleb Smirnoff 			droptype = DTYPE_FORCED;
333772e66a6SGleb Smirnoff 		} else if (rp->red_old == 0) {
334772e66a6SGleb Smirnoff 			/* first exceeds th_min */
335772e66a6SGleb Smirnoff 			rp->red_count = 1;
336772e66a6SGleb Smirnoff 			rp->red_old = 1;
337772e66a6SGleb Smirnoff 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
338772e66a6SGleb Smirnoff 				      rp->red_probd, rp->red_count)) {
339772e66a6SGleb Smirnoff 			/* mark or drop by red */
340772e66a6SGleb Smirnoff 			if ((rp->red_flags & REDF_ECN) &&
341772e66a6SGleb Smirnoff 			    mark_ecn(m, pktattr, rp->red_flags)) {
342772e66a6SGleb Smirnoff 				/* successfully marked.  do not drop. */
343772e66a6SGleb Smirnoff 				rp->red_count = 0;
344772e66a6SGleb Smirnoff #ifdef RED_STATS
345772e66a6SGleb Smirnoff 				rp->red_stats.marked_packets++;
346772e66a6SGleb Smirnoff #endif
347772e66a6SGleb Smirnoff 			} else {
348772e66a6SGleb Smirnoff 				/* unforced drop by red */
349772e66a6SGleb Smirnoff 				droptype = DTYPE_EARLY;
350772e66a6SGleb Smirnoff 			}
351772e66a6SGleb Smirnoff 		}
352772e66a6SGleb Smirnoff 	} else {
353772e66a6SGleb Smirnoff 		/* avg < th_min */
354772e66a6SGleb Smirnoff 		rp->red_old = 0;
355772e66a6SGleb Smirnoff 	}
356772e66a6SGleb Smirnoff 
357772e66a6SGleb Smirnoff 	/*
358772e66a6SGleb Smirnoff 	 * if the queue length hits the hard limit, it's a forced drop.
359772e66a6SGleb Smirnoff 	 */
360772e66a6SGleb Smirnoff 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
361772e66a6SGleb Smirnoff 		droptype = DTYPE_FORCED;
362772e66a6SGleb Smirnoff 
363772e66a6SGleb Smirnoff #ifdef RED_RANDOM_DROP
364772e66a6SGleb Smirnoff 	/* if successful or forced drop, enqueue this packet. */
365772e66a6SGleb Smirnoff 	if (droptype != DTYPE_EARLY)
366772e66a6SGleb Smirnoff 		_addq(q, m);
367772e66a6SGleb Smirnoff #else
368772e66a6SGleb Smirnoff 	/* if successful, enqueue this packet. */
369772e66a6SGleb Smirnoff 	if (droptype == DTYPE_NODROP)
370772e66a6SGleb Smirnoff 		_addq(q, m);
371772e66a6SGleb Smirnoff #endif
372772e66a6SGleb Smirnoff 	if (droptype != DTYPE_NODROP) {
373772e66a6SGleb Smirnoff 		if (droptype == DTYPE_EARLY) {
374772e66a6SGleb Smirnoff 			/* drop the incoming packet */
375772e66a6SGleb Smirnoff #ifdef RED_STATS
376772e66a6SGleb Smirnoff 			rp->red_stats.drop_unforced++;
377772e66a6SGleb Smirnoff #endif
378772e66a6SGleb Smirnoff 		} else {
379772e66a6SGleb Smirnoff 			/* forced drop, select a victim packet in the queue. */
380772e66a6SGleb Smirnoff #ifdef RED_RANDOM_DROP
381772e66a6SGleb Smirnoff 			m = _getq_random(q);
382772e66a6SGleb Smirnoff #endif
383772e66a6SGleb Smirnoff #ifdef RED_STATS
384772e66a6SGleb Smirnoff 			rp->red_stats.drop_forced++;
385772e66a6SGleb Smirnoff #endif
386772e66a6SGleb Smirnoff 		}
387772e66a6SGleb Smirnoff #ifdef RED_STATS
388772e66a6SGleb Smirnoff 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
389772e66a6SGleb Smirnoff #endif
390772e66a6SGleb Smirnoff 		rp->red_count = 0;
391772e66a6SGleb Smirnoff 		m_freem(m);
392772e66a6SGleb Smirnoff 		return (-1);
393772e66a6SGleb Smirnoff 	}
394772e66a6SGleb Smirnoff 	/* successfully queued */
395772e66a6SGleb Smirnoff #ifdef RED_STATS
396772e66a6SGleb Smirnoff 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
397772e66a6SGleb Smirnoff #endif
398772e66a6SGleb Smirnoff 	return (0);
399772e66a6SGleb Smirnoff }
400772e66a6SGleb Smirnoff 
401772e66a6SGleb Smirnoff /*
402772e66a6SGleb Smirnoff  * early-drop probability is calculated as follows:
403772e66a6SGleb Smirnoff  *   prob = p_max * (avg - th_min) / (th_max - th_min)
404772e66a6SGleb Smirnoff  *   prob_a = prob / (2 - count*prob)
405772e66a6SGleb Smirnoff  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
406772e66a6SGleb Smirnoff  * here prob_a increases as successive undrop count increases.
407772e66a6SGleb Smirnoff  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
408772e66a6SGleb Smirnoff  * becomes 1 when (count >= (2 / prob))).
409772e66a6SGleb Smirnoff  */
410772e66a6SGleb Smirnoff int
drop_early(int fp_len,int fp_probd,int count)411772e66a6SGleb Smirnoff drop_early(int fp_len, int fp_probd, int count)
412772e66a6SGleb Smirnoff {
413772e66a6SGleb Smirnoff 	int	d;		/* denominator of drop-probability */
414772e66a6SGleb Smirnoff 
415772e66a6SGleb Smirnoff 	d = fp_probd - count * fp_len;
416772e66a6SGleb Smirnoff 	if (d <= 0)
417772e66a6SGleb Smirnoff 		/* count exceeds the hard limit: drop or mark */
418772e66a6SGleb Smirnoff 		return (1);
419772e66a6SGleb Smirnoff 
420772e66a6SGleb Smirnoff 	/*
421772e66a6SGleb Smirnoff 	 * now the range of d is [1..600] in fixed-point. (when
422772e66a6SGleb Smirnoff 	 * th_max-th_min=10 and p_max=1/30)
423772e66a6SGleb Smirnoff 	 * drop probability = (avg - TH_MIN) / d
424772e66a6SGleb Smirnoff 	 */
425772e66a6SGleb Smirnoff 
426772e66a6SGleb Smirnoff 	if ((arc4random() % d) < fp_len) {
427772e66a6SGleb Smirnoff 		/* drop or mark */
428772e66a6SGleb Smirnoff 		return (1);
429772e66a6SGleb Smirnoff 	}
430772e66a6SGleb Smirnoff 	/* no drop/mark */
431772e66a6SGleb Smirnoff 	return (0);
432772e66a6SGleb Smirnoff }
433772e66a6SGleb Smirnoff 
434772e66a6SGleb Smirnoff /*
435772e66a6SGleb Smirnoff  * try to mark CE bit to the packet.
436772e66a6SGleb Smirnoff  *    returns 1 if successfully marked, 0 otherwise.
437772e66a6SGleb Smirnoff  */
438772e66a6SGleb Smirnoff int
mark_ecn(struct mbuf * m,struct altq_pktattr * pktattr,int flags)439772e66a6SGleb Smirnoff mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
440772e66a6SGleb Smirnoff {
441772e66a6SGleb Smirnoff 	struct mbuf	*m0;
442772e66a6SGleb Smirnoff 	struct pf_mtag	*at;
443772e66a6SGleb Smirnoff 	void		*hdr;
444772e66a6SGleb Smirnoff 
445772e66a6SGleb Smirnoff 	at = pf_find_mtag(m);
446772e66a6SGleb Smirnoff 	if (at != NULL) {
447772e66a6SGleb Smirnoff 		hdr = at->hdr;
448772e66a6SGleb Smirnoff 	} else
449772e66a6SGleb Smirnoff 		return (0);
450772e66a6SGleb Smirnoff 
451772e66a6SGleb Smirnoff 	/* verify that pattr_hdr is within the mbuf data */
452772e66a6SGleb Smirnoff 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
453772e66a6SGleb Smirnoff 		if (((caddr_t)hdr >= m0->m_data) &&
454772e66a6SGleb Smirnoff 		    ((caddr_t)hdr < m0->m_data + m0->m_len))
455772e66a6SGleb Smirnoff 			break;
456772e66a6SGleb Smirnoff 	if (m0 == NULL) {
457772e66a6SGleb Smirnoff 		/* ick, tag info is stale */
458772e66a6SGleb Smirnoff 		return (0);
459772e66a6SGleb Smirnoff 	}
460772e66a6SGleb Smirnoff 
461772e66a6SGleb Smirnoff 	switch (((struct ip *)hdr)->ip_v) {
462772e66a6SGleb Smirnoff 	case IPVERSION:
463772e66a6SGleb Smirnoff 		if (flags & REDF_ECN4) {
464772e66a6SGleb Smirnoff 			struct ip *ip = hdr;
465772e66a6SGleb Smirnoff 			u_int8_t otos;
466772e66a6SGleb Smirnoff 			int sum;
467772e66a6SGleb Smirnoff 
468772e66a6SGleb Smirnoff 			if (ip->ip_v != 4)
469772e66a6SGleb Smirnoff 				return (0);	/* version mismatch! */
470772e66a6SGleb Smirnoff 
471772e66a6SGleb Smirnoff 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
472772e66a6SGleb Smirnoff 				return (0);	/* not-ECT */
473772e66a6SGleb Smirnoff 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
474772e66a6SGleb Smirnoff 				return (1);	/* already marked */
475772e66a6SGleb Smirnoff 
476772e66a6SGleb Smirnoff 			/*
477772e66a6SGleb Smirnoff 			 * ecn-capable but not marked,
478772e66a6SGleb Smirnoff 			 * mark CE and update checksum
479772e66a6SGleb Smirnoff 			 */
480772e66a6SGleb Smirnoff 			otos = ip->ip_tos;
481772e66a6SGleb Smirnoff 			ip->ip_tos |= IPTOS_ECN_CE;
482772e66a6SGleb Smirnoff 			/*
483772e66a6SGleb Smirnoff 			 * update checksum (from RFC1624)
484772e66a6SGleb Smirnoff 			 *	   HC' = ~(~HC + ~m + m')
485772e66a6SGleb Smirnoff 			 */
486772e66a6SGleb Smirnoff 			sum = ~ntohs(ip->ip_sum) & 0xffff;
487772e66a6SGleb Smirnoff 			sum += (~otos & 0xffff) + ip->ip_tos;
488772e66a6SGleb Smirnoff 			sum = (sum >> 16) + (sum & 0xffff);
489772e66a6SGleb Smirnoff 			sum += (sum >> 16);  /* add carry */
490772e66a6SGleb Smirnoff 			ip->ip_sum = htons(~sum & 0xffff);
491772e66a6SGleb Smirnoff 			return (1);
492772e66a6SGleb Smirnoff 		}
493772e66a6SGleb Smirnoff 		break;
494772e66a6SGleb Smirnoff #ifdef INET6
495772e66a6SGleb Smirnoff 	case (IPV6_VERSION >> 4):
496772e66a6SGleb Smirnoff 		if (flags & REDF_ECN6) {
497772e66a6SGleb Smirnoff 			struct ip6_hdr *ip6 = hdr;
498772e66a6SGleb Smirnoff 			u_int32_t flowlabel;
499772e66a6SGleb Smirnoff 
500772e66a6SGleb Smirnoff 			flowlabel = ntohl(ip6->ip6_flow);
501772e66a6SGleb Smirnoff 			if ((flowlabel >> 28) != 6)
502772e66a6SGleb Smirnoff 				return (0);	/* version mismatch! */
503772e66a6SGleb Smirnoff 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
504772e66a6SGleb Smirnoff 			    (IPTOS_ECN_NOTECT << 20))
505772e66a6SGleb Smirnoff 				return (0);	/* not-ECT */
506772e66a6SGleb Smirnoff 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
507772e66a6SGleb Smirnoff 			    (IPTOS_ECN_CE << 20))
508772e66a6SGleb Smirnoff 				return (1);	/* already marked */
509772e66a6SGleb Smirnoff 			/*
510772e66a6SGleb Smirnoff 			 * ecn-capable but not marked,  mark CE
511772e66a6SGleb Smirnoff 			 */
512772e66a6SGleb Smirnoff 			flowlabel |= (IPTOS_ECN_CE << 20);
513772e66a6SGleb Smirnoff 			ip6->ip6_flow = htonl(flowlabel);
514772e66a6SGleb Smirnoff 			return (1);
515772e66a6SGleb Smirnoff 		}
516772e66a6SGleb Smirnoff 		break;
517772e66a6SGleb Smirnoff #endif  /* INET6 */
518772e66a6SGleb Smirnoff 	}
519772e66a6SGleb Smirnoff 
520772e66a6SGleb Smirnoff 	/* not marked */
521772e66a6SGleb Smirnoff 	return (0);
522772e66a6SGleb Smirnoff }
523772e66a6SGleb Smirnoff 
524772e66a6SGleb Smirnoff struct mbuf *
red_getq(red_t * rp,class_queue_t * q)525*c492eb60SMateusz Guzik red_getq(red_t *rp, class_queue_t *q)
526772e66a6SGleb Smirnoff {
527772e66a6SGleb Smirnoff 	struct mbuf *m;
528772e66a6SGleb Smirnoff 
529772e66a6SGleb Smirnoff 	if ((m = _getq(q)) == NULL) {
530772e66a6SGleb Smirnoff 		if (rp->red_idle == 0) {
531772e66a6SGleb Smirnoff 			rp->red_idle = 1;
532772e66a6SGleb Smirnoff 			microtime(&rp->red_last);
533772e66a6SGleb Smirnoff 		}
534772e66a6SGleb Smirnoff 		return NULL;
535772e66a6SGleb Smirnoff 	}
536772e66a6SGleb Smirnoff 
537772e66a6SGleb Smirnoff 	rp->red_idle = 0;
538772e66a6SGleb Smirnoff 	return (m);
539772e66a6SGleb Smirnoff }
540772e66a6SGleb Smirnoff 
541772e66a6SGleb Smirnoff /*
542772e66a6SGleb Smirnoff  * helper routine to calibrate avg during idle.
543772e66a6SGleb Smirnoff  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
544772e66a6SGleb Smirnoff  * here Wq = 1/weight and the code assumes Wq is close to zero.
545772e66a6SGleb Smirnoff  *
546772e66a6SGleb Smirnoff  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
547772e66a6SGleb Smirnoff  */
548772e66a6SGleb Smirnoff static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
549772e66a6SGleb Smirnoff 
550772e66a6SGleb Smirnoff struct wtab *
wtab_alloc(int weight)551772e66a6SGleb Smirnoff wtab_alloc(int weight)
552772e66a6SGleb Smirnoff {
553772e66a6SGleb Smirnoff 	struct wtab	*w;
554772e66a6SGleb Smirnoff 	int		 i;
555772e66a6SGleb Smirnoff 
556772e66a6SGleb Smirnoff 	for (w = wtab_list; w != NULL; w = w->w_next)
557772e66a6SGleb Smirnoff 		if (w->w_weight == weight) {
558772e66a6SGleb Smirnoff 			w->w_refcount++;
559772e66a6SGleb Smirnoff 			return (w);
560772e66a6SGleb Smirnoff 		}
561772e66a6SGleb Smirnoff 
562772e66a6SGleb Smirnoff 	w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
563772e66a6SGleb Smirnoff 	if (w == NULL)
564772e66a6SGleb Smirnoff 		return (NULL);
565772e66a6SGleb Smirnoff 	w->w_weight = weight;
566772e66a6SGleb Smirnoff 	w->w_refcount = 1;
567772e66a6SGleb Smirnoff 	w->w_next = wtab_list;
568772e66a6SGleb Smirnoff 	wtab_list = w;
569772e66a6SGleb Smirnoff 
570772e66a6SGleb Smirnoff 	/* initialize the weight table */
571772e66a6SGleb Smirnoff 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
572772e66a6SGleb Smirnoff 	for (i = 1; i < 32; i++) {
573772e66a6SGleb Smirnoff 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
574772e66a6SGleb Smirnoff 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
575772e66a6SGleb Smirnoff 			w->w_param_max = 1 << i;
576772e66a6SGleb Smirnoff 	}
577772e66a6SGleb Smirnoff 
578772e66a6SGleb Smirnoff 	return (w);
579772e66a6SGleb Smirnoff }
580772e66a6SGleb Smirnoff 
581772e66a6SGleb Smirnoff int
wtab_destroy(struct wtab * w)582772e66a6SGleb Smirnoff wtab_destroy(struct wtab *w)
583772e66a6SGleb Smirnoff {
584772e66a6SGleb Smirnoff 	struct wtab	*prev;
585772e66a6SGleb Smirnoff 
586772e66a6SGleb Smirnoff 	if (--w->w_refcount > 0)
587772e66a6SGleb Smirnoff 		return (0);
588772e66a6SGleb Smirnoff 
589772e66a6SGleb Smirnoff 	if (wtab_list == w)
590772e66a6SGleb Smirnoff 		wtab_list = w->w_next;
591772e66a6SGleb Smirnoff 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
592772e66a6SGleb Smirnoff 		if (prev->w_next == w) {
593772e66a6SGleb Smirnoff 			prev->w_next = w->w_next;
594772e66a6SGleb Smirnoff 			break;
595772e66a6SGleb Smirnoff 		}
596772e66a6SGleb Smirnoff 
597772e66a6SGleb Smirnoff 	free(w, M_DEVBUF);
598772e66a6SGleb Smirnoff 	return (0);
599772e66a6SGleb Smirnoff }
600772e66a6SGleb Smirnoff 
601772e66a6SGleb Smirnoff int32_t
pow_w(struct wtab * w,int n)602772e66a6SGleb Smirnoff pow_w(struct wtab *w, int n)
603772e66a6SGleb Smirnoff {
604772e66a6SGleb Smirnoff 	int	i, bit;
605772e66a6SGleb Smirnoff 	int32_t	val;
606772e66a6SGleb Smirnoff 
607772e66a6SGleb Smirnoff 	if (n >= w->w_param_max)
608772e66a6SGleb Smirnoff 		return (0);
609772e66a6SGleb Smirnoff 
610772e66a6SGleb Smirnoff 	val = 1 << FP_SHIFT;
611772e66a6SGleb Smirnoff 	if (n <= 0)
612772e66a6SGleb Smirnoff 		return (val);
613772e66a6SGleb Smirnoff 
614772e66a6SGleb Smirnoff 	bit = 1;
615772e66a6SGleb Smirnoff 	i = 0;
616772e66a6SGleb Smirnoff 	while (n) {
617772e66a6SGleb Smirnoff 		if (n & bit) {
618772e66a6SGleb Smirnoff 			val = (val * w->w_tab[i]) >> FP_SHIFT;
619772e66a6SGleb Smirnoff 			n &= ~bit;
620772e66a6SGleb Smirnoff 		}
621772e66a6SGleb Smirnoff 		i++;
622772e66a6SGleb Smirnoff 		bit <<=  1;
623772e66a6SGleb Smirnoff 	}
624772e66a6SGleb Smirnoff 	return (val);
625772e66a6SGleb Smirnoff }
626772e66a6SGleb Smirnoff 
627772e66a6SGleb Smirnoff #endif /* ALTQ_RED */
628