1 #include <sys/cdefs.h> 2 #include "opt_inet.h" 3 #include "opt_inet6.h" 4 #include "opt_ipsec.h" 5 #include "opt_ratelimit.h" 6 #include "opt_kern_tls.h" 7 #include <sys/param.h> 8 #include <sys/arb.h> 9 #include <sys/module.h> 10 #include <sys/kernel.h> 11 #ifdef TCP_HHOOK 12 #include <sys/hhook.h> 13 #endif 14 #include <sys/lock.h> 15 #include <sys/malloc.h> 16 #include <sys/lock.h> 17 #include <sys/mutex.h> 18 #include <sys/mbuf.h> 19 #include <sys/proc.h> /* for proc0 declaration */ 20 #include <sys/socket.h> 21 #include <sys/socketvar.h> 22 #include <sys/sysctl.h> 23 #include <sys/systm.h> 24 #ifdef STATS 25 #include <sys/qmath.h> 26 #include <sys/tree.h> 27 #include <sys/stats.h> /* Must come after qmath.h and tree.h */ 28 #else 29 #include <sys/tree.h> 30 #endif 31 #include <sys/refcount.h> 32 #include <sys/queue.h> 33 #include <sys/tim_filter.h> 34 #include <sys/smp.h> 35 #include <sys/kthread.h> 36 #include <sys/kern_prefetch.h> 37 #include <sys/protosw.h> 38 #ifdef TCP_ACCOUNTING 39 #include <sys/sched.h> 40 #include <machine/cpu.h> 41 #endif 42 #include <vm/uma.h> 43 44 #include <net/route.h> 45 #include <net/route/nhop.h> 46 #include <net/vnet.h> 47 48 #define TCPSTATES /* for logging */ 49 50 #include <netinet/in.h> 51 #include <netinet/in_kdtrace.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip_icmp.h> /* required for icmp_var.h */ 55 #include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ 56 #include <netinet/ip_var.h> 57 #include <netinet/ip6.h> 58 #include <netinet6/in6_pcb.h> 59 #include <netinet6/ip6_var.h> 60 #include <netinet/tcp.h> 61 #include <netinet/tcp_fsm.h> 62 #include <netinet/tcp_seq.h> 63 #include <netinet/tcp_timer.h> 64 #include <netinet/tcp_var.h> 65 #include <netinet/tcp_log_buf.h> 66 #include <netinet/tcp_syncache.h> 67 #include <netinet/tcp_hpts.h> 68 #include <netinet/tcp_accounting.h> 69 #include <netinet/tcpip.h> 70 #include <netinet/cc/cc.h> 71 #include <netinet/cc/cc_newreno.h> 72 #include <netinet/tcp_fastopen.h> 73 #include <netinet/tcp_lro.h> 74 #ifdef NETFLIX_SHARED_CWND 75 #include <netinet/tcp_shared_cwnd.h> 76 #endif 77 #ifdef TCP_OFFLOAD 78 #include <netinet/tcp_offload.h> 79 #endif 80 #ifdef INET6 81 #include <netinet6/tcp6_var.h> 82 #endif 83 #include <netinet/tcp_ecn.h> 84 85 #include <netipsec/ipsec_support.h> 86 87 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 88 #include <netipsec/ipsec.h> 89 #include <netipsec/ipsec6.h> 90 #endif /* IPSEC */ 91 92 #include <netinet/udp.h> 93 #include <netinet/udp_var.h> 94 #include <machine/in_cksum.h> 95 96 #ifdef MAC 97 #include <security/mac/mac_framework.h> 98 #endif 99 #include "sack_filter.h" 100 #include "tcp_rack.h" 101 #include "tailq_hash.h" 102 #include "opt_global.h" 103 104 105 struct rack_sendmap * 106 tqhash_min(struct tailq_hash *hs) 107 { 108 struct rack_sendmap *rsm; 109 110 rsm = hs->rsm_min; 111 return(rsm); 112 } 113 114 struct rack_sendmap * 115 tqhash_max(struct tailq_hash *hs) 116 { 117 struct rack_sendmap *rsm; 118 119 rsm = hs->rsm_max; 120 return (rsm); 121 } 122 123 int 124 tqhash_empty(struct tailq_hash *hs) 125 { 126 if (hs->count == 0) 127 return(1); 128 return(0); 129 } 130 131 struct rack_sendmap * 132 tqhash_find(struct tailq_hash *hs, uint32_t seq) 133 { 134 struct rack_sendmap *e; 135 int bindex, pbucket, fc = 1; 136 137 if ((SEQ_LT(seq, hs->min)) || 138 (hs->count == 0) || 139 (SEQ_GEQ(seq, hs->max))) { 140 /* Not here */ 141 return (NULL); 142 } 143 bindex = seq / SEQ_BUCKET_SIZE; 144 bindex %= MAX_HASH_ENTRIES; 145 /* Lets look through the bucket it belongs to */ 146 if (TAILQ_EMPTY(&hs->ht[bindex])) { 147 goto look_backwards; 148 } 149 TAILQ_FOREACH(e, &hs->ht[bindex], next) { 150 if (fc == 1) { 151 /* 152 * Special check for when a cum-ack 153 * as moved up over a seq and now its 154 * a bucket behind where it belongs. In 155 * the case of SACKs which create new rsm's 156 * this won't occur. 157 */ 158 if (SEQ_GT(e->r_start, seq)) { 159 goto look_backwards; 160 } 161 fc = 0; 162 } 163 if (SEQ_GEQ(seq, e->r_start) && 164 (SEQ_LT(seq, e->r_end))) { 165 /* Its in this block */ 166 return (e); 167 } 168 } 169 /* Did not find it */ 170 return (NULL); 171 look_backwards: 172 if (bindex == 0) 173 pbucket = MAX_HASH_ENTRIES - 1; 174 else 175 pbucket = bindex - 1; 176 TAILQ_FOREACH_REVERSE(e, &hs->ht[pbucket], rack_head, next) { 177 if (SEQ_GEQ(seq, e->r_start) && 178 (SEQ_LT(seq, e->r_end))) { 179 /* Its in this block */ 180 return (e); 181 } 182 if (SEQ_GEQ(e->r_end, seq)) 183 break; 184 } 185 return (NULL); 186 } 187 188 struct rack_sendmap * 189 tqhash_next(struct tailq_hash *hs, struct rack_sendmap *rsm) 190 { 191 struct rack_sendmap *e; 192 193 e = TAILQ_NEXT(rsm, next); 194 if (e == NULL) { 195 /* Move to next bucket */ 196 int nxt; 197 198 nxt = rsm->bindex + 1; 199 if (nxt >= MAX_HASH_ENTRIES) 200 nxt = 0; 201 e = TAILQ_FIRST(&hs->ht[nxt]); 202 } 203 return(e); 204 } 205 206 struct rack_sendmap * 207 tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm) 208 { 209 struct rack_sendmap *e; 210 211 e = TAILQ_PREV(rsm, rack_head, next); 212 if (e == NULL) { 213 int prev; 214 215 if (rsm->bindex > 0) 216 prev = rsm->bindex - 1; 217 else 218 prev = MAX_HASH_ENTRIES - 1; 219 e = TAILQ_LAST(&hs->ht[prev], rack_head); 220 } 221 return (e); 222 } 223 224 void 225 tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type) 226 { 227 228 hs->count--; 229 if (hs->count == 0) { 230 hs->min = hs->max; 231 hs->rsm_max = hs->rsm_min = NULL; 232 } else if (type == REMOVE_TYPE_CUMACK) { 233 hs->min = rsm->r_end; 234 hs->rsm_min = tqhash_next(hs, rsm); 235 } else if (rsm == hs->rsm_max) { 236 hs->rsm_max = tqhash_prev(hs, rsm); 237 hs->max = hs->rsm_max->r_end; 238 } 239 TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next); 240 } 241 242 int 243 tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm) 244 { 245 struct rack_sendmap *e, *l; 246 int inserted = 0; 247 uint32_t ebucket; 248 249 #ifdef INVARIANTS 250 if (hs->count > 0) { 251 if ((rsm->r_end - hs->min) > MAX_ALLOWED_SEQ_RANGE) { 252 return (-1); 253 } 254 e = tqhash_find(hs, rsm->r_start); 255 if (e) { 256 return (-2); 257 } 258 } 259 #endif 260 rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE; 261 rsm->bindex %= MAX_HASH_ENTRIES; 262 ebucket = rsm->r_end / SEQ_BUCKET_SIZE; 263 ebucket %= MAX_HASH_ENTRIES; 264 if (ebucket != rsm->bindex) { 265 /* This RSM straddles the bucket boundary */ 266 rsm->r_flags |= RACK_STRADDLE; 267 } else { 268 rsm->r_flags &= ~RACK_STRADDLE; 269 } 270 if (hs->count == 0) { 271 /* Special case */ 272 hs->min = rsm->r_start; 273 hs->max = rsm->r_end; 274 hs->rsm_min = hs->rsm_max = rsm; 275 hs->count = 1; 276 } else { 277 hs->count++; 278 if (SEQ_GEQ(rsm->r_end, hs->max)) { 279 hs->max = rsm->r_end; 280 hs->rsm_max = rsm; 281 } if (SEQ_LEQ(rsm->r_start, hs->min)) { 282 hs->min = rsm->r_start; 283 hs->rsm_min = rsm; 284 } 285 } 286 /* Check the common case of inserting at the end */ 287 l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head); 288 if ((l == NULL) || (SEQ_GT(rsm->r_start, l->r_start))) { 289 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next); 290 return (0); 291 } 292 TAILQ_FOREACH(e, &hs->ht[rsm->bindex], next) { 293 if (SEQ_LEQ(rsm->r_start, e->r_start)) { 294 inserted = 1; 295 TAILQ_INSERT_BEFORE(e, rsm, next); 296 break; 297 } 298 } 299 if (inserted == 0) { 300 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next); 301 } 302 return (0); 303 } 304 305 void 306 tqhash_init(struct tailq_hash *hs) 307 { 308 int i; 309 310 for(i = 0; i < MAX_HASH_ENTRIES; i++) { 311 TAILQ_INIT(&hs->ht[i]); 312 } 313 hs->min = hs->max = 0; 314 hs->rsm_min = hs->rsm_max = NULL; 315 hs->count = 0; 316 } 317 318 int 319 tqhash_trim(struct tailq_hash *hs, uint32_t th_ack) 320 { 321 struct rack_sendmap *rsm; 322 323 if (SEQ_LT(th_ack, hs->min)) { 324 /* It can't be behind our current min */ 325 return (-1); 326 } 327 if (SEQ_GEQ(th_ack, hs->max)) { 328 /* It can't be beyond or at our current max */ 329 return (-2); 330 } 331 rsm = tqhash_min(hs); 332 if (rsm == NULL) { 333 /* nothing to trim */ 334 return (-3); 335 } 336 if (SEQ_GEQ(th_ack, rsm->r_end)) { 337 /* 338 * You can't trim all bytes instead 339 * you need to remove it. 340 */ 341 return (-4); 342 } 343 if (SEQ_GT(th_ack, hs->min)) 344 hs->min = th_ack; 345 /* 346 * Should we trim it for the caller? 347 * they may have already which is ok... 348 */ 349 if (SEQ_GT(th_ack, rsm->r_start)) { 350 rsm->r_start = th_ack; 351 } 352 return (0); 353 } 354 355 void 356 tqhash_update_end(struct tailq_hash *hs, struct rack_sendmap *rsm, 357 uint32_t th_ack) 358 { 359 if (hs->max == rsm->r_end) 360 hs->max = th_ack; 361 rsm->r_end = th_ack; 362 } 363