xref: /freebsd/sys/netinet/tcp_stacks/tailq_hash.c (revision e18b97bd63a8112625f7014d2326ecf533b710dd)
1030434acSRandall Stewart #include <sys/cdefs.h>
2030434acSRandall Stewart #include "opt_inet.h"
3030434acSRandall Stewart #include "opt_inet6.h"
4030434acSRandall Stewart #include "opt_ipsec.h"
5030434acSRandall Stewart #include "opt_ratelimit.h"
6030434acSRandall Stewart #include "opt_kern_tls.h"
7030434acSRandall Stewart #include <sys/param.h>
8030434acSRandall Stewart #include <sys/arb.h>
9030434acSRandall Stewart #include <sys/module.h>
10030434acSRandall Stewart #include <sys/kernel.h>
11030434acSRandall Stewart #ifdef TCP_HHOOK
12030434acSRandall Stewart #include <sys/hhook.h>
13030434acSRandall Stewart #endif
14030434acSRandall Stewart #include <sys/lock.h>
15030434acSRandall Stewart #include <sys/malloc.h>
16030434acSRandall Stewart #include <sys/lock.h>
17030434acSRandall Stewart #include <sys/mutex.h>
18030434acSRandall Stewart #include <sys/mbuf.h>
19030434acSRandall Stewart #include <sys/proc.h>		/* for proc0 declaration */
20030434acSRandall Stewart #include <sys/socket.h>
21030434acSRandall Stewart #include <sys/socketvar.h>
22030434acSRandall Stewart #include <sys/sysctl.h>
23030434acSRandall Stewart #include <sys/systm.h>
24030434acSRandall Stewart #ifdef STATS
25030434acSRandall Stewart #include <sys/qmath.h>
26030434acSRandall Stewart #include <sys/tree.h>
27030434acSRandall Stewart #include <sys/stats.h> /* Must come after qmath.h and tree.h */
28030434acSRandall Stewart #else
29030434acSRandall Stewart #include <sys/tree.h>
30030434acSRandall Stewart #endif
31030434acSRandall Stewart #include <sys/refcount.h>
32030434acSRandall Stewart #include <sys/queue.h>
33030434acSRandall Stewart #include <sys/tim_filter.h>
34030434acSRandall Stewart #include <sys/smp.h>
35030434acSRandall Stewart #include <sys/kthread.h>
36030434acSRandall Stewart #include <sys/kern_prefetch.h>
37030434acSRandall Stewart #include <sys/protosw.h>
38030434acSRandall Stewart #ifdef TCP_ACCOUNTING
39030434acSRandall Stewart #include <sys/sched.h>
40030434acSRandall Stewart #include <machine/cpu.h>
41030434acSRandall Stewart #endif
42030434acSRandall Stewart #include <vm/uma.h>
43030434acSRandall Stewart 
44030434acSRandall Stewart #include <net/route.h>
45030434acSRandall Stewart #include <net/route/nhop.h>
46030434acSRandall Stewart #include <net/vnet.h>
47030434acSRandall Stewart 
48030434acSRandall Stewart #define TCPSTATES		/* for logging */
49030434acSRandall Stewart 
50030434acSRandall Stewart #include <netinet/in.h>
51030434acSRandall Stewart #include <netinet/in_kdtrace.h>
52030434acSRandall Stewart #include <netinet/in_pcb.h>
53030434acSRandall Stewart #include <netinet/ip.h>
54030434acSRandall Stewart #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
55030434acSRandall Stewart #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
56030434acSRandall Stewart #include <netinet/ip_var.h>
57030434acSRandall Stewart #include <netinet/ip6.h>
58030434acSRandall Stewart #include <netinet6/in6_pcb.h>
59030434acSRandall Stewart #include <netinet6/ip6_var.h>
60030434acSRandall Stewart #include <netinet/tcp.h>
61030434acSRandall Stewart #include <netinet/tcp_fsm.h>
62030434acSRandall Stewart #include <netinet/tcp_seq.h>
63030434acSRandall Stewart #include <netinet/tcp_timer.h>
64030434acSRandall Stewart #include <netinet/tcp_var.h>
65030434acSRandall Stewart #include <netinet/tcp_log_buf.h>
66030434acSRandall Stewart #include <netinet/tcp_syncache.h>
67030434acSRandall Stewart #include <netinet/tcp_hpts.h>
68030434acSRandall Stewart #include <netinet/tcp_accounting.h>
69030434acSRandall Stewart #include <netinet/tcpip.h>
70030434acSRandall Stewart #include <netinet/cc/cc.h>
71030434acSRandall Stewart #include <netinet/cc/cc_newreno.h>
72030434acSRandall Stewart #include <netinet/tcp_fastopen.h>
73030434acSRandall Stewart #include <netinet/tcp_lro.h>
74030434acSRandall Stewart #ifdef NETFLIX_SHARED_CWND
75030434acSRandall Stewart #include <netinet/tcp_shared_cwnd.h>
76030434acSRandall Stewart #endif
77030434acSRandall Stewart #ifdef TCP_OFFLOAD
78030434acSRandall Stewart #include <netinet/tcp_offload.h>
79030434acSRandall Stewart #endif
80030434acSRandall Stewart #ifdef INET6
81030434acSRandall Stewart #include <netinet6/tcp6_var.h>
82030434acSRandall Stewart #endif
83030434acSRandall Stewart #include <netinet/tcp_ecn.h>
84030434acSRandall Stewart 
85030434acSRandall Stewart #include <netipsec/ipsec_support.h>
86030434acSRandall Stewart 
87030434acSRandall Stewart #if defined(IPSEC) || defined(IPSEC_SUPPORT)
88030434acSRandall Stewart #include <netipsec/ipsec.h>
89030434acSRandall Stewart #include <netipsec/ipsec6.h>
90030434acSRandall Stewart #endif				/* IPSEC */
91030434acSRandall Stewart 
92030434acSRandall Stewart #include <netinet/udp.h>
93030434acSRandall Stewart #include <netinet/udp_var.h>
94030434acSRandall Stewart #include <machine/in_cksum.h>
95030434acSRandall Stewart 
96030434acSRandall Stewart #ifdef MAC
97030434acSRandall Stewart #include <security/mac/mac_framework.h>
98030434acSRandall Stewart #endif
99030434acSRandall Stewart #include "sack_filter.h"
100030434acSRandall Stewart #include "tcp_rack.h"
101030434acSRandall Stewart #include "tailq_hash.h"
102*e18b97bdSRandall Stewart #include "opt_global.h"
103030434acSRandall Stewart 
104030434acSRandall Stewart 
105030434acSRandall Stewart struct rack_sendmap *
tqhash_min(struct tailq_hash * hs)106030434acSRandall Stewart tqhash_min(struct tailq_hash *hs)
107030434acSRandall Stewart {
108030434acSRandall Stewart 	struct rack_sendmap *rsm;
109030434acSRandall Stewart 
110*e18b97bdSRandall Stewart 	rsm = hs->rsm_min;
111030434acSRandall Stewart 	return(rsm);
112030434acSRandall Stewart }
113030434acSRandall Stewart 
114030434acSRandall Stewart struct rack_sendmap *
tqhash_max(struct tailq_hash * hs)115030434acSRandall Stewart tqhash_max(struct tailq_hash *hs)
116030434acSRandall Stewart {
117030434acSRandall Stewart 	struct rack_sendmap *rsm;
118030434acSRandall Stewart 
119*e18b97bdSRandall Stewart 	rsm = hs->rsm_max;
120030434acSRandall Stewart 	return (rsm);
121030434acSRandall Stewart }
122030434acSRandall Stewart 
123030434acSRandall Stewart int
tqhash_empty(struct tailq_hash * hs)124030434acSRandall Stewart tqhash_empty(struct tailq_hash *hs)
125030434acSRandall Stewart {
126030434acSRandall Stewart 	if (hs->count == 0)
127030434acSRandall Stewart 		return(1);
128030434acSRandall Stewart 	return(0);
129030434acSRandall Stewart }
130030434acSRandall Stewart 
131030434acSRandall Stewart struct rack_sendmap *
tqhash_find(struct tailq_hash * hs,uint32_t seq)132030434acSRandall Stewart tqhash_find(struct tailq_hash *hs, uint32_t seq)
133030434acSRandall Stewart {
134030434acSRandall Stewart 	struct rack_sendmap *e;
135030434acSRandall Stewart 	int bindex, pbucket, fc = 1;
136030434acSRandall Stewart 
137030434acSRandall Stewart 	if ((SEQ_LT(seq, hs->min)) ||
138030434acSRandall Stewart 	    (hs->count == 0) ||
139030434acSRandall Stewart 	    (SEQ_GEQ(seq, hs->max))) {
140030434acSRandall Stewart 		/* Not here */
141030434acSRandall Stewart 		return (NULL);
142030434acSRandall Stewart 	}
143030434acSRandall Stewart 	bindex = seq / SEQ_BUCKET_SIZE;
144030434acSRandall Stewart 	bindex %= MAX_HASH_ENTRIES;
145030434acSRandall Stewart 	/* Lets look through the bucket it belongs to */
146030434acSRandall Stewart 	if (TAILQ_EMPTY(&hs->ht[bindex])) {
147030434acSRandall Stewart 		goto look_backwards;
148030434acSRandall Stewart 	}
149030434acSRandall Stewart 	TAILQ_FOREACH(e, &hs->ht[bindex], next) {
150030434acSRandall Stewart 		if (fc == 1) {
151030434acSRandall Stewart 			/*
152030434acSRandall Stewart 			 * Special check for when a cum-ack
153030434acSRandall Stewart 			 * as moved up over a seq and now its
154030434acSRandall Stewart 			 * a bucket behind where it belongs. In
155030434acSRandall Stewart 			 * the case of SACKs which create new rsm's
156030434acSRandall Stewart 			 * this won't occur.
157030434acSRandall Stewart 			 */
158030434acSRandall Stewart 			if (SEQ_GT(e->r_start, seq)) {
159030434acSRandall Stewart 				goto look_backwards;
160030434acSRandall Stewart 			}
161030434acSRandall Stewart 			fc = 0;
162030434acSRandall Stewart 		}
163030434acSRandall Stewart 		if (SEQ_GEQ(seq, e->r_start) &&
164030434acSRandall Stewart 		    (SEQ_LT(seq, e->r_end))) {
165030434acSRandall Stewart 			/* Its in this block */
166030434acSRandall Stewart 			return (e);
167030434acSRandall Stewart 		}
168030434acSRandall Stewart 	}
169030434acSRandall Stewart 	/* Did not find it */
170030434acSRandall Stewart 	return (NULL);
171030434acSRandall Stewart look_backwards:
172030434acSRandall Stewart 	if (bindex == 0)
173030434acSRandall Stewart 		pbucket = MAX_HASH_ENTRIES - 1;
174030434acSRandall Stewart 	else
175030434acSRandall Stewart 		pbucket = bindex - 1;
176030434acSRandall Stewart 	TAILQ_FOREACH_REVERSE(e, &hs->ht[pbucket], rack_head, next) {
177030434acSRandall Stewart 		if (SEQ_GEQ(seq, e->r_start) &&
178030434acSRandall Stewart 		    (SEQ_LT(seq, e->r_end))) {
179030434acSRandall Stewart 			/* Its in this block */
180030434acSRandall Stewart 			return (e);
181030434acSRandall Stewart 		}
182030434acSRandall Stewart 		if (SEQ_GEQ(e->r_end, seq))
183030434acSRandall Stewart 			break;
184030434acSRandall Stewart 	}
185030434acSRandall Stewart 	return (NULL);
186030434acSRandall Stewart }
187030434acSRandall Stewart 
188030434acSRandall Stewart struct rack_sendmap *
tqhash_next(struct tailq_hash * hs,struct rack_sendmap * rsm)189030434acSRandall Stewart tqhash_next(struct tailq_hash *hs, struct rack_sendmap *rsm)
190030434acSRandall Stewart {
191030434acSRandall Stewart 	struct rack_sendmap *e;
192030434acSRandall Stewart 
193030434acSRandall Stewart 	e = TAILQ_NEXT(rsm, next);
194030434acSRandall Stewart 	if (e == NULL) {
195030434acSRandall Stewart 		/* Move to next bucket */
196030434acSRandall Stewart 		int nxt;
197030434acSRandall Stewart 
198030434acSRandall Stewart 		nxt = rsm->bindex + 1;
199030434acSRandall Stewart 		if (nxt >= MAX_HASH_ENTRIES)
200030434acSRandall Stewart 			nxt = 0;
201030434acSRandall Stewart 		e = TAILQ_FIRST(&hs->ht[nxt]);
202030434acSRandall Stewart 	}
203030434acSRandall Stewart 	return(e);
204030434acSRandall Stewart }
205030434acSRandall Stewart 
206030434acSRandall Stewart struct rack_sendmap *
tqhash_prev(struct tailq_hash * hs,struct rack_sendmap * rsm)207030434acSRandall Stewart tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm)
208030434acSRandall Stewart {
209030434acSRandall Stewart 	struct rack_sendmap *e;
210030434acSRandall Stewart 
211030434acSRandall Stewart 	e = TAILQ_PREV(rsm, rack_head, next);
212030434acSRandall Stewart 	if (e == NULL) {
213030434acSRandall Stewart 		int prev;
214030434acSRandall Stewart 
215030434acSRandall Stewart 		if (rsm->bindex > 0)
216030434acSRandall Stewart 			prev = rsm->bindex - 1;
217030434acSRandall Stewart 		else
218030434acSRandall Stewart 			prev = MAX_HASH_ENTRIES - 1;
219030434acSRandall Stewart 		e = TAILQ_LAST(&hs->ht[prev], rack_head);
220030434acSRandall Stewart 	}
221030434acSRandall Stewart 	return (e);
222030434acSRandall Stewart }
223030434acSRandall Stewart 
224030434acSRandall Stewart void
tqhash_remove(struct tailq_hash * hs,struct rack_sendmap * rsm,int type)225030434acSRandall Stewart tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type)
226030434acSRandall Stewart {
227*e18b97bdSRandall Stewart 
228030434acSRandall Stewart 	hs->count--;
229030434acSRandall Stewart 	if (hs->count == 0) {
230030434acSRandall Stewart 		hs->min = hs->max;
231*e18b97bdSRandall Stewart 		hs->rsm_max = hs->rsm_min = NULL;
232030434acSRandall Stewart 	} else if (type == REMOVE_TYPE_CUMACK) {
233030434acSRandall Stewart 		hs->min = rsm->r_end;
234*e18b97bdSRandall Stewart 		hs->rsm_min = tqhash_next(hs, rsm);
235*e18b97bdSRandall Stewart 	} else if (rsm == hs->rsm_max) {
236*e18b97bdSRandall Stewart 		hs->rsm_max = tqhash_prev(hs, rsm);
237*e18b97bdSRandall Stewart 		hs->max = hs->rsm_max->r_end;
238030434acSRandall Stewart 	}
239*e18b97bdSRandall Stewart 	TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next);
240030434acSRandall Stewart }
241030434acSRandall Stewart 
242030434acSRandall Stewart int
tqhash_insert(struct tailq_hash * hs,struct rack_sendmap * rsm)243030434acSRandall Stewart tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm)
244030434acSRandall Stewart {
245030434acSRandall Stewart 	struct rack_sendmap *e, *l;
246030434acSRandall Stewart 	int inserted = 0;
247030434acSRandall Stewart 	uint32_t ebucket;
248030434acSRandall Stewart 
249*e18b97bdSRandall Stewart #ifdef INVARIANTS
250030434acSRandall Stewart 	if (hs->count > 0) {
251030434acSRandall Stewart 		if ((rsm->r_end - hs->min) >  MAX_ALLOWED_SEQ_RANGE) {
252030434acSRandall Stewart 			return (-1);
253030434acSRandall Stewart 		}
254030434acSRandall Stewart 		e = tqhash_find(hs, rsm->r_start);
255030434acSRandall Stewart 		if (e) {
256030434acSRandall Stewart 			return (-2);
257030434acSRandall Stewart 		}
258030434acSRandall Stewart 	}
259*e18b97bdSRandall Stewart #endif
260030434acSRandall Stewart 	rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE;
261030434acSRandall Stewart 	rsm->bindex %= MAX_HASH_ENTRIES;
262030434acSRandall Stewart 	ebucket = rsm->r_end / SEQ_BUCKET_SIZE;
263030434acSRandall Stewart 	ebucket %= MAX_HASH_ENTRIES;
264030434acSRandall Stewart 	if (ebucket != rsm->bindex) {
265030434acSRandall Stewart 		/* This RSM straddles the bucket boundary */
266030434acSRandall Stewart 		rsm->r_flags |= RACK_STRADDLE;
267030434acSRandall Stewart 	} else {
268030434acSRandall Stewart 		rsm->r_flags &= ~RACK_STRADDLE;
269030434acSRandall Stewart 	}
270030434acSRandall Stewart 	if (hs->count == 0) {
271030434acSRandall Stewart 		/* Special case */
272030434acSRandall Stewart 		hs->min = rsm->r_start;
273030434acSRandall Stewart 		hs->max = rsm->r_end;
274*e18b97bdSRandall Stewart 		hs->rsm_min = hs->rsm_max = rsm;
275030434acSRandall Stewart 		hs->count = 1;
276030434acSRandall Stewart 	} else {
277030434acSRandall Stewart 		hs->count++;
278*e18b97bdSRandall Stewart 		if (SEQ_GEQ(rsm->r_end, hs->max)) {
279030434acSRandall Stewart 			hs->max = rsm->r_end;
280*e18b97bdSRandall Stewart 			hs->rsm_max = rsm;
281*e18b97bdSRandall Stewart 		} if (SEQ_LEQ(rsm->r_start, hs->min)) {
282030434acSRandall Stewart 			hs->min = rsm->r_start;
283*e18b97bdSRandall Stewart 			hs->rsm_min = rsm;
284*e18b97bdSRandall Stewart 		}
285030434acSRandall Stewart 	}
286030434acSRandall Stewart 	/* Check the common case of inserting at the end */
287030434acSRandall Stewart 	l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head);
288030434acSRandall Stewart 	if ((l == NULL) || (SEQ_GT(rsm->r_start, l->r_start))) {
289030434acSRandall Stewart 		TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next);
290030434acSRandall Stewart 		return (0);
291030434acSRandall Stewart 	}
292030434acSRandall Stewart 	TAILQ_FOREACH(e, &hs->ht[rsm->bindex], next) {
293030434acSRandall Stewart 		if (SEQ_LEQ(rsm->r_start, e->r_start)) {
294030434acSRandall Stewart 			inserted = 1;
295030434acSRandall Stewart 			TAILQ_INSERT_BEFORE(e, rsm, next);
296030434acSRandall Stewart 			break;
297030434acSRandall Stewart 		}
298030434acSRandall Stewart 	}
299030434acSRandall Stewart 	if (inserted == 0) {
300030434acSRandall Stewart 		TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next);
301030434acSRandall Stewart 	}
302030434acSRandall Stewart 	return (0);
303030434acSRandall Stewart }
304030434acSRandall Stewart 
305030434acSRandall Stewart void
tqhash_init(struct tailq_hash * hs)306030434acSRandall Stewart tqhash_init(struct tailq_hash *hs)
307030434acSRandall Stewart {
308030434acSRandall Stewart 	int i;
309030434acSRandall Stewart 
310030434acSRandall Stewart 	for(i = 0; i < MAX_HASH_ENTRIES; i++) {
311030434acSRandall Stewart 		TAILQ_INIT(&hs->ht[i]);
312030434acSRandall Stewart 	}
313030434acSRandall Stewart 	hs->min = hs->max = 0;
314*e18b97bdSRandall Stewart 	hs->rsm_min = hs->rsm_max = NULL;
315030434acSRandall Stewart 	hs->count = 0;
316030434acSRandall Stewart }
317030434acSRandall Stewart 
318030434acSRandall Stewart int
tqhash_trim(struct tailq_hash * hs,uint32_t th_ack)319030434acSRandall Stewart tqhash_trim(struct tailq_hash *hs, uint32_t th_ack)
320030434acSRandall Stewart {
321030434acSRandall Stewart 	struct rack_sendmap *rsm;
322030434acSRandall Stewart 
323030434acSRandall Stewart 	if (SEQ_LT(th_ack, hs->min)) {
324030434acSRandall Stewart 		/* It can't be behind our current min */
325030434acSRandall Stewart 		return (-1);
326030434acSRandall Stewart 	}
327030434acSRandall Stewart 	if (SEQ_GEQ(th_ack, hs->max)) {
328030434acSRandall Stewart 		/*  It can't be beyond or at our current max */
329030434acSRandall Stewart 		return (-2);
330030434acSRandall Stewart 	}
331030434acSRandall Stewart 	rsm = tqhash_min(hs);
332030434acSRandall Stewart 	if (rsm == NULL) {
333030434acSRandall Stewart 		/* nothing to trim */
334030434acSRandall Stewart 		return (-3);
335030434acSRandall Stewart 	}
336030434acSRandall Stewart 	if (SEQ_GEQ(th_ack, rsm->r_end)) {
337030434acSRandall Stewart 		/*
338030434acSRandall Stewart 		 * You can't trim all bytes instead
339030434acSRandall Stewart 		 * you need to remove it.
340030434acSRandall Stewart 		 */
341030434acSRandall Stewart 		return (-4);
342030434acSRandall Stewart 	}
343030434acSRandall Stewart 	if (SEQ_GT(th_ack, hs->min))
344030434acSRandall Stewart 	    hs->min = th_ack;
345030434acSRandall Stewart 	/*
346030434acSRandall Stewart 	 * Should we trim it for the caller?
347030434acSRandall Stewart 	 * they may have already which is ok...
348030434acSRandall Stewart 	 */
349030434acSRandall Stewart 	if (SEQ_GT(th_ack, rsm->r_start)) {
350030434acSRandall Stewart 		rsm->r_start = th_ack;
351030434acSRandall Stewart 	}
352030434acSRandall Stewart 	return (0);
353030434acSRandall Stewart }
354030434acSRandall Stewart 
355*e18b97bdSRandall Stewart void
tqhash_update_end(struct tailq_hash * hs,struct rack_sendmap * rsm,uint32_t th_ack)356*e18b97bdSRandall Stewart tqhash_update_end(struct tailq_hash *hs, struct rack_sendmap *rsm,
357*e18b97bdSRandall Stewart     uint32_t th_ack)
358*e18b97bdSRandall Stewart {
359*e18b97bdSRandall Stewart 	if (hs->max == rsm->r_end)
360*e18b97bdSRandall Stewart 		hs->max = th_ack;
361*e18b97bdSRandall Stewart 	rsm->r_end = th_ack;
362*e18b97bdSRandall Stewart }
363