1 #include <sys/cdefs.h>
2 #include "opt_inet.h"
3 #include "opt_inet6.h"
4 #include "opt_ipsec.h"
5 #include "opt_ratelimit.h"
6 #include "opt_kern_tls.h"
7 #include <sys/param.h>
8 #include <sys/arb.h>
9 #include <sys/module.h>
10 #include <sys/kernel.h>
11 #ifdef TCP_HHOOK
12 #include <sys/hhook.h>
13 #endif
14 #include <sys/lock.h>
15 #include <sys/malloc.h>
16 #include <sys/lock.h>
17 #include <sys/mutex.h>
18 #include <sys/mbuf.h>
19 #include <sys/proc.h> /* for proc0 declaration */
20 #include <sys/socket.h>
21 #include <sys/socketvar.h>
22 #include <sys/sysctl.h>
23 #include <sys/systm.h>
24 #ifdef STATS
25 #include <sys/qmath.h>
26 #include <sys/tree.h>
27 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
28 #else
29 #include <sys/tree.h>
30 #endif
31 #include <sys/refcount.h>
32 #include <sys/queue.h>
33 #include <sys/tim_filter.h>
34 #include <sys/smp.h>
35 #include <sys/kthread.h>
36 #include <sys/kern_prefetch.h>
37 #include <sys/protosw.h>
38 #ifdef TCP_ACCOUNTING
39 #include <sys/sched.h>
40 #include <machine/cpu.h>
41 #endif
42 #include <vm/uma.h>
43
44 #include <net/route.h>
45 #include <net/route/nhop.h>
46 #include <net/vnet.h>
47
48 #define TCPSTATES /* for logging */
49
50 #include <netinet/in.h>
51 #include <netinet/in_kdtrace.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/ip6.h>
56 #include <netinet6/in6_pcb.h>
57 #include <netinet6/ip6_var.h>
58 #include <netinet/tcp.h>
59 #include <netinet/tcp_fsm.h>
60 #include <netinet/tcp_seq.h>
61 #include <netinet/tcp_timer.h>
62 #include <netinet/tcp_var.h>
63 #include <netinet/tcp_log_buf.h>
64 #include <netinet/tcp_syncache.h>
65 #include <netinet/tcp_hpts.h>
66 #include <netinet/tcp_accounting.h>
67 #include <netinet/tcpip.h>
68 #include <netinet/cc/cc.h>
69 #include <netinet/cc/cc_newreno.h>
70 #include <netinet/tcp_fastopen.h>
71 #include <netinet/tcp_lro.h>
72 #ifdef NETFLIX_SHARED_CWND
73 #include <netinet/tcp_shared_cwnd.h>
74 #endif
75 #ifdef TCP_OFFLOAD
76 #include <netinet/tcp_offload.h>
77 #endif
78 #ifdef INET6
79 #include <netinet6/tcp6_var.h>
80 #endif
81 #include <netinet/tcp_ecn.h>
82
83 #include <netipsec/ipsec_support.h>
84
85 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
86 #include <netipsec/ipsec.h>
87 #include <netipsec/ipsec6.h>
88 #endif /* IPSEC */
89
90 #include <netinet/udp.h>
91 #include <netinet/udp_var.h>
92 #include <machine/in_cksum.h>
93
94 #ifdef MAC
95 #include <security/mac/mac_framework.h>
96 #endif
97 #include "sack_filter.h"
98 #include "tcp_rack.h"
99 #include "tailq_hash.h"
100 #include "opt_global.h"
101
102
103 struct rack_sendmap *
tqhash_min(struct tailq_hash * hs)104 tqhash_min(struct tailq_hash *hs)
105 {
106 struct rack_sendmap *rsm;
107
108 rsm = hs->rsm_min;
109 return(rsm);
110 }
111
112 struct rack_sendmap *
tqhash_max(struct tailq_hash * hs)113 tqhash_max(struct tailq_hash *hs)
114 {
115 struct rack_sendmap *rsm;
116
117 rsm = hs->rsm_max;
118 return (rsm);
119 }
120
121 int
tqhash_empty(struct tailq_hash * hs)122 tqhash_empty(struct tailq_hash *hs)
123 {
124 if (hs->count == 0)
125 return(1);
126 return(0);
127 }
128
129 struct rack_sendmap *
tqhash_find(struct tailq_hash * hs,uint32_t seq)130 tqhash_find(struct tailq_hash *hs, uint32_t seq)
131 {
132 struct rack_sendmap *e;
133 int bindex, pbucket, fc = 1;
134
135 if ((SEQ_LT(seq, hs->min)) ||
136 (hs->count == 0) ||
137 (SEQ_GEQ(seq, hs->max))) {
138 /* Not here */
139 return (NULL);
140 }
141 bindex = seq / SEQ_BUCKET_SIZE;
142 bindex %= MAX_HASH_ENTRIES;
143 /* Lets look through the bucket it belongs to */
144 if (TAILQ_EMPTY(&hs->ht[bindex])) {
145 goto look_backwards;
146 }
147 TAILQ_FOREACH(e, &hs->ht[bindex], next) {
148 if (fc == 1) {
149 /*
150 * Special check for when a cum-ack
151 * as moved up over a seq and now its
152 * a bucket behind where it belongs. In
153 * the case of SACKs which create new rsm's
154 * this won't occur.
155 */
156 if (SEQ_GT(e->r_start, seq)) {
157 goto look_backwards;
158 }
159 fc = 0;
160 }
161 if (SEQ_GEQ(seq, e->r_start) &&
162 (SEQ_LT(seq, e->r_end))) {
163 /* Its in this block */
164 return (e);
165 }
166 }
167 /* Did not find it */
168 return (NULL);
169 look_backwards:
170 if (bindex == 0)
171 pbucket = MAX_HASH_ENTRIES - 1;
172 else
173 pbucket = bindex - 1;
174 TAILQ_FOREACH_REVERSE(e, &hs->ht[pbucket], rack_head, next) {
175 if (SEQ_GEQ(seq, e->r_start) &&
176 (SEQ_LT(seq, e->r_end))) {
177 /* Its in this block */
178 return (e);
179 }
180 if (SEQ_GEQ(e->r_end, seq))
181 break;
182 }
183 return (NULL);
184 }
185
186 struct rack_sendmap *
tqhash_next(struct tailq_hash * hs,struct rack_sendmap * rsm)187 tqhash_next(struct tailq_hash *hs, struct rack_sendmap *rsm)
188 {
189 struct rack_sendmap *e;
190
191 e = TAILQ_NEXT(rsm, next);
192 if (e == NULL) {
193 /* Move to next bucket */
194 int nxt;
195
196 nxt = rsm->bindex + 1;
197 if (nxt >= MAX_HASH_ENTRIES)
198 nxt = 0;
199 e = TAILQ_FIRST(&hs->ht[nxt]);
200 }
201 return(e);
202 }
203
204 struct rack_sendmap *
tqhash_prev(struct tailq_hash * hs,struct rack_sendmap * rsm)205 tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm)
206 {
207 struct rack_sendmap *e;
208
209 e = TAILQ_PREV(rsm, rack_head, next);
210 if (e == NULL) {
211 int prev;
212
213 if (rsm->bindex > 0)
214 prev = rsm->bindex - 1;
215 else
216 prev = MAX_HASH_ENTRIES - 1;
217 e = TAILQ_LAST(&hs->ht[prev], rack_head);
218 }
219 return (e);
220 }
221
222 void
tqhash_remove(struct tailq_hash * hs,struct rack_sendmap * rsm,int type)223 tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type)
224 {
225
226 hs->count--;
227 if (hs->count == 0) {
228 hs->min = hs->max;
229 hs->rsm_max = hs->rsm_min = NULL;
230 } else if (type == REMOVE_TYPE_CUMACK) {
231 hs->min = rsm->r_end;
232 hs->rsm_min = tqhash_next(hs, rsm);
233 } else if (rsm == hs->rsm_max) {
234 hs->rsm_max = tqhash_prev(hs, rsm);
235 hs->max = hs->rsm_max->r_end;
236 }
237 TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next);
238 }
239
240 int
tqhash_insert(struct tailq_hash * hs,struct rack_sendmap * rsm)241 tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm)
242 {
243 struct rack_sendmap *e, *l;
244 int inserted = 0;
245 uint32_t ebucket;
246
247 #ifdef INVARIANTS
248 if (hs->count > 0) {
249 if ((rsm->r_end - hs->min) > MAX_ALLOWED_SEQ_RANGE) {
250 return (-1);
251 }
252 e = tqhash_find(hs, rsm->r_start);
253 if (e) {
254 return (-2);
255 }
256 }
257 #endif
258 rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE;
259 rsm->bindex %= MAX_HASH_ENTRIES;
260 ebucket = rsm->r_end / SEQ_BUCKET_SIZE;
261 ebucket %= MAX_HASH_ENTRIES;
262 if (ebucket != rsm->bindex) {
263 /* This RSM straddles the bucket boundary */
264 rsm->r_flags |= RACK_STRADDLE;
265 } else {
266 rsm->r_flags &= ~RACK_STRADDLE;
267 }
268 if (hs->count == 0) {
269 /* Special case */
270 hs->min = rsm->r_start;
271 hs->max = rsm->r_end;
272 hs->rsm_min = hs->rsm_max = rsm;
273 hs->count = 1;
274 } else {
275 hs->count++;
276 if (SEQ_GEQ(rsm->r_end, hs->max)) {
277 hs->max = rsm->r_end;
278 hs->rsm_max = rsm;
279 } if (SEQ_LEQ(rsm->r_start, hs->min)) {
280 hs->min = rsm->r_start;
281 hs->rsm_min = rsm;
282 }
283 }
284 /* Check the common case of inserting at the end */
285 l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head);
286 if ((l == NULL) || (SEQ_GT(rsm->r_start, l->r_start))) {
287 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next);
288 return (0);
289 }
290 TAILQ_FOREACH(e, &hs->ht[rsm->bindex], next) {
291 if (SEQ_LEQ(rsm->r_start, e->r_start)) {
292 inserted = 1;
293 TAILQ_INSERT_BEFORE(e, rsm, next);
294 break;
295 }
296 }
297 if (inserted == 0) {
298 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next);
299 }
300 return (0);
301 }
302
303 void
tqhash_init(struct tailq_hash * hs)304 tqhash_init(struct tailq_hash *hs)
305 {
306 int i;
307
308 for(i = 0; i < MAX_HASH_ENTRIES; i++) {
309 TAILQ_INIT(&hs->ht[i]);
310 }
311 hs->min = hs->max = 0;
312 hs->rsm_min = hs->rsm_max = NULL;
313 hs->count = 0;
314 }
315
316 int
tqhash_trim(struct tailq_hash * hs,uint32_t th_ack)317 tqhash_trim(struct tailq_hash *hs, uint32_t th_ack)
318 {
319 struct rack_sendmap *rsm;
320
321 if (SEQ_LT(th_ack, hs->min)) {
322 /* It can't be behind our current min */
323 return (-1);
324 }
325 if (SEQ_GEQ(th_ack, hs->max)) {
326 /* It can't be beyond or at our current max */
327 return (-2);
328 }
329 rsm = tqhash_min(hs);
330 if (rsm == NULL) {
331 /* nothing to trim */
332 return (-3);
333 }
334 if (SEQ_GEQ(th_ack, rsm->r_end)) {
335 /*
336 * You can't trim all bytes instead
337 * you need to remove it.
338 */
339 return (-4);
340 }
341 if (SEQ_GT(th_ack, hs->min))
342 hs->min = th_ack;
343 /*
344 * Should we trim it for the caller?
345 * they may have already which is ok...
346 */
347 if (SEQ_GT(th_ack, rsm->r_start)) {
348 rsm->r_start = th_ack;
349 }
350 return (0);
351 }
352
353 void
tqhash_update_end(struct tailq_hash * hs,struct rack_sendmap * rsm,uint32_t th_ack)354 tqhash_update_end(struct tailq_hash *hs, struct rack_sendmap *rsm,
355 uint32_t th_ack)
356 {
357 if (hs->max == rsm->r_end)
358 hs->max = th_ack;
359 rsm->r_end = th_ack;
360 }
361