1 #include <sys/cdefs.h> 2 #include "opt_inet.h" 3 #include "opt_inet6.h" 4 #include "opt_ipsec.h" 5 #include "opt_ratelimit.h" 6 #include "opt_kern_tls.h" 7 #include <sys/param.h> 8 #include <sys/arb.h> 9 #include <sys/module.h> 10 #include <sys/kernel.h> 11 #ifdef TCP_HHOOK 12 #include <sys/hhook.h> 13 #endif 14 #include <sys/lock.h> 15 #include <sys/malloc.h> 16 #include <sys/lock.h> 17 #include <sys/mutex.h> 18 #include <sys/mbuf.h> 19 #include <sys/proc.h> /* for proc0 declaration */ 20 #include <sys/socket.h> 21 #include <sys/socketvar.h> 22 #include <sys/sysctl.h> 23 #include <sys/systm.h> 24 #ifdef STATS 25 #include <sys/qmath.h> 26 #include <sys/tree.h> 27 #include <sys/stats.h> /* Must come after qmath.h and tree.h */ 28 #else 29 #include <sys/tree.h> 30 #endif 31 #include <sys/refcount.h> 32 #include <sys/queue.h> 33 #include <sys/tim_filter.h> 34 #include <sys/smp.h> 35 #include <sys/kthread.h> 36 #include <sys/kern_prefetch.h> 37 #include <sys/protosw.h> 38 #ifdef TCP_ACCOUNTING 39 #include <sys/sched.h> 40 #include <machine/cpu.h> 41 #endif 42 #include <vm/uma.h> 43 44 #include <net/route.h> 45 #include <net/route/nhop.h> 46 #include <net/vnet.h> 47 48 #define TCPSTATES /* for logging */ 49 50 #include <netinet/in.h> 51 #include <netinet/in_kdtrace.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip_var.h> 55 #include <netinet/ip6.h> 56 #include <netinet6/in6_pcb.h> 57 #include <netinet6/ip6_var.h> 58 #include <netinet/tcp.h> 59 #include <netinet/tcp_fsm.h> 60 #include <netinet/tcp_seq.h> 61 #include <netinet/tcp_timer.h> 62 #include <netinet/tcp_var.h> 63 #include <netinet/tcp_log_buf.h> 64 #include <netinet/tcp_syncache.h> 65 #include <netinet/tcp_hpts.h> 66 #include <netinet/tcp_accounting.h> 67 #include <netinet/tcpip.h> 68 #include <netinet/cc/cc.h> 69 #include <netinet/cc/cc_newreno.h> 70 #include <netinet/tcp_fastopen.h> 71 #include <netinet/tcp_lro.h> 72 #ifdef NETFLIX_SHARED_CWND 73 #include <netinet/tcp_shared_cwnd.h> 74 #endif 75 #ifdef TCP_OFFLOAD 76 #include <netinet/tcp_offload.h> 77 #endif 78 #ifdef INET6 79 #include <netinet6/tcp6_var.h> 80 #endif 81 #include <netinet/tcp_ecn.h> 82 83 #include <netipsec/ipsec_support.h> 84 85 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 86 #include <netipsec/ipsec.h> 87 #include <netipsec/ipsec6.h> 88 #endif /* IPSEC */ 89 90 #include <netinet/udp.h> 91 #include <netinet/udp_var.h> 92 #include <machine/in_cksum.h> 93 94 #ifdef MAC 95 #include <security/mac/mac_framework.h> 96 #endif 97 #include "sack_filter.h" 98 #include "tcp_rack.h" 99 #include "tailq_hash.h" 100 #include "opt_global.h" 101 102 103 struct rack_sendmap * 104 tqhash_min(struct tailq_hash *hs) 105 { 106 struct rack_sendmap *rsm; 107 108 rsm = hs->rsm_min; 109 return(rsm); 110 } 111 112 struct rack_sendmap * 113 tqhash_max(struct tailq_hash *hs) 114 { 115 struct rack_sendmap *rsm; 116 117 rsm = hs->rsm_max; 118 return (rsm); 119 } 120 121 int 122 tqhash_empty(struct tailq_hash *hs) 123 { 124 if (hs->count == 0) 125 return(1); 126 return(0); 127 } 128 129 struct rack_sendmap * 130 tqhash_find(struct tailq_hash *hs, uint32_t seq) 131 { 132 struct rack_sendmap *e; 133 int bindex, pbucket, fc = 1; 134 135 if ((SEQ_LT(seq, hs->min)) || 136 (hs->count == 0) || 137 (SEQ_GEQ(seq, hs->max))) { 138 /* Not here */ 139 return (NULL); 140 } 141 bindex = seq / SEQ_BUCKET_SIZE; 142 bindex %= MAX_HASH_ENTRIES; 143 /* Lets look through the bucket it belongs to */ 144 if (TAILQ_EMPTY(&hs->ht[bindex])) { 145 goto look_backwards; 146 } 147 TAILQ_FOREACH(e, &hs->ht[bindex], next) { 148 if (fc == 1) { 149 /* 150 * Special check for when a cum-ack 151 * as moved up over a seq and now its 152 * a bucket behind where it belongs. In 153 * the case of SACKs which create new rsm's 154 * this won't occur. 155 */ 156 if (SEQ_GT(e->r_start, seq)) { 157 goto look_backwards; 158 } 159 fc = 0; 160 } 161 if (SEQ_GEQ(seq, e->r_start) && 162 (SEQ_LT(seq, e->r_end))) { 163 /* Its in this block */ 164 return (e); 165 } 166 } 167 /* Did not find it */ 168 return (NULL); 169 look_backwards: 170 if (bindex == 0) 171 pbucket = MAX_HASH_ENTRIES - 1; 172 else 173 pbucket = bindex - 1; 174 TAILQ_FOREACH_REVERSE(e, &hs->ht[pbucket], rack_head, next) { 175 if (SEQ_GEQ(seq, e->r_start) && 176 (SEQ_LT(seq, e->r_end))) { 177 /* Its in this block */ 178 return (e); 179 } 180 if (SEQ_GEQ(e->r_end, seq)) 181 break; 182 } 183 return (NULL); 184 } 185 186 struct rack_sendmap * 187 tqhash_next(struct tailq_hash *hs, struct rack_sendmap *rsm) 188 { 189 struct rack_sendmap *e; 190 191 e = TAILQ_NEXT(rsm, next); 192 if (e == NULL) { 193 /* Move to next bucket */ 194 int nxt; 195 196 nxt = rsm->bindex + 1; 197 if (nxt >= MAX_HASH_ENTRIES) 198 nxt = 0; 199 e = TAILQ_FIRST(&hs->ht[nxt]); 200 } 201 return(e); 202 } 203 204 struct rack_sendmap * 205 tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm) 206 { 207 struct rack_sendmap *e; 208 209 e = TAILQ_PREV(rsm, rack_head, next); 210 if (e == NULL) { 211 int prev; 212 213 if (rsm->bindex > 0) 214 prev = rsm->bindex - 1; 215 else 216 prev = MAX_HASH_ENTRIES - 1; 217 e = TAILQ_LAST(&hs->ht[prev], rack_head); 218 } 219 return (e); 220 } 221 222 void 223 tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type) 224 { 225 226 hs->count--; 227 if (hs->count == 0) { 228 hs->min = hs->max; 229 hs->rsm_max = hs->rsm_min = NULL; 230 } else if (type == REMOVE_TYPE_CUMACK) { 231 hs->min = rsm->r_end; 232 hs->rsm_min = tqhash_next(hs, rsm); 233 } else if (rsm == hs->rsm_max) { 234 hs->rsm_max = tqhash_prev(hs, rsm); 235 hs->max = hs->rsm_max->r_end; 236 } 237 TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next); 238 } 239 240 int 241 tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm) 242 { 243 struct rack_sendmap *e, *l; 244 int inserted = 0; 245 uint32_t ebucket; 246 247 #ifdef INVARIANTS 248 if (hs->count > 0) { 249 if ((rsm->r_end - hs->min) > MAX_ALLOWED_SEQ_RANGE) { 250 return (-1); 251 } 252 e = tqhash_find(hs, rsm->r_start); 253 if (e) { 254 return (-2); 255 } 256 } 257 #endif 258 rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE; 259 rsm->bindex %= MAX_HASH_ENTRIES; 260 ebucket = rsm->r_end / SEQ_BUCKET_SIZE; 261 ebucket %= MAX_HASH_ENTRIES; 262 if (ebucket != rsm->bindex) { 263 /* This RSM straddles the bucket boundary */ 264 rsm->r_flags |= RACK_STRADDLE; 265 } else { 266 rsm->r_flags &= ~RACK_STRADDLE; 267 } 268 if (hs->count == 0) { 269 /* Special case */ 270 hs->min = rsm->r_start; 271 hs->max = rsm->r_end; 272 hs->rsm_min = hs->rsm_max = rsm; 273 hs->count = 1; 274 } else { 275 hs->count++; 276 if (SEQ_GEQ(rsm->r_end, hs->max)) { 277 hs->max = rsm->r_end; 278 hs->rsm_max = rsm; 279 } if (SEQ_LEQ(rsm->r_start, hs->min)) { 280 hs->min = rsm->r_start; 281 hs->rsm_min = rsm; 282 } 283 } 284 /* Check the common case of inserting at the end */ 285 l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head); 286 if ((l == NULL) || (SEQ_GT(rsm->r_start, l->r_start))) { 287 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next); 288 return (0); 289 } 290 TAILQ_FOREACH(e, &hs->ht[rsm->bindex], next) { 291 if (SEQ_LEQ(rsm->r_start, e->r_start)) { 292 inserted = 1; 293 TAILQ_INSERT_BEFORE(e, rsm, next); 294 break; 295 } 296 } 297 if (inserted == 0) { 298 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next); 299 } 300 return (0); 301 } 302 303 void 304 tqhash_init(struct tailq_hash *hs) 305 { 306 int i; 307 308 for(i = 0; i < MAX_HASH_ENTRIES; i++) { 309 TAILQ_INIT(&hs->ht[i]); 310 } 311 hs->min = hs->max = 0; 312 hs->rsm_min = hs->rsm_max = NULL; 313 hs->count = 0; 314 } 315 316 int 317 tqhash_trim(struct tailq_hash *hs, uint32_t th_ack) 318 { 319 struct rack_sendmap *rsm; 320 321 if (SEQ_LT(th_ack, hs->min)) { 322 /* It can't be behind our current min */ 323 return (-1); 324 } 325 if (SEQ_GEQ(th_ack, hs->max)) { 326 /* It can't be beyond or at our current max */ 327 return (-2); 328 } 329 rsm = tqhash_min(hs); 330 if (rsm == NULL) { 331 /* nothing to trim */ 332 return (-3); 333 } 334 if (SEQ_GEQ(th_ack, rsm->r_end)) { 335 /* 336 * You can't trim all bytes instead 337 * you need to remove it. 338 */ 339 return (-4); 340 } 341 if (SEQ_GT(th_ack, hs->min)) 342 hs->min = th_ack; 343 /* 344 * Should we trim it for the caller? 345 * they may have already which is ok... 346 */ 347 if (SEQ_GT(th_ack, rsm->r_start)) { 348 rsm->r_start = th_ack; 349 } 350 return (0); 351 } 352 353 void 354 tqhash_update_end(struct tailq_hash *hs, struct rack_sendmap *rsm, 355 uint32_t th_ack) 356 { 357 if (hs->max == rsm->r_end) 358 hs->max = th_ack; 359 rsm->r_end = th_ack; 360 } 361