1 #include <sys/cdefs.h> 2 __FBSDID("$FreeBSD$"); 3 4 #include "opt_inet.h" 5 #include "opt_inet6.h" 6 #include "opt_ipsec.h" 7 #include "opt_ratelimit.h" 8 #include "opt_kern_tls.h" 9 #include <sys/param.h> 10 #include <sys/arb.h> 11 #include <sys/module.h> 12 #include <sys/kernel.h> 13 #ifdef TCP_HHOOK 14 #include <sys/hhook.h> 15 #endif 16 #include <sys/lock.h> 17 #include <sys/malloc.h> 18 #include <sys/lock.h> 19 #include <sys/mutex.h> 20 #include <sys/mbuf.h> 21 #include <sys/proc.h> /* for proc0 declaration */ 22 #include <sys/socket.h> 23 #include <sys/socketvar.h> 24 #include <sys/sysctl.h> 25 #include <sys/systm.h> 26 #ifdef STATS 27 #include <sys/qmath.h> 28 #include <sys/tree.h> 29 #include <sys/stats.h> /* Must come after qmath.h and tree.h */ 30 #else 31 #include <sys/tree.h> 32 #endif 33 #include <sys/refcount.h> 34 #include <sys/queue.h> 35 #include <sys/tim_filter.h> 36 #include <sys/smp.h> 37 #include <sys/kthread.h> 38 #include <sys/kern_prefetch.h> 39 #include <sys/protosw.h> 40 #ifdef TCP_ACCOUNTING 41 #include <sys/sched.h> 42 #include <machine/cpu.h> 43 #endif 44 #include <vm/uma.h> 45 46 #include <net/route.h> 47 #include <net/route/nhop.h> 48 #include <net/vnet.h> 49 50 #define TCPSTATES /* for logging */ 51 52 #include <netinet/in.h> 53 #include <netinet/in_kdtrace.h> 54 #include <netinet/in_pcb.h> 55 #include <netinet/ip.h> 56 #include <netinet/ip_icmp.h> /* required for icmp_var.h */ 57 #include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ 58 #include <netinet/ip_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet6/in6_pcb.h> 61 #include <netinet6/ip6_var.h> 62 #include <netinet/tcp.h> 63 #define TCPOUTFLAGS 64 #include <netinet/tcp_fsm.h> 65 #include <netinet/tcp_seq.h> 66 #include <netinet/tcp_timer.h> 67 #include <netinet/tcp_var.h> 68 #include <netinet/tcp_log_buf.h> 69 #include <netinet/tcp_syncache.h> 70 #include <netinet/tcp_hpts.h> 71 #include <netinet/tcp_ratelimit.h> 72 #include <netinet/tcp_accounting.h> 73 #include <netinet/tcpip.h> 74 #include <netinet/cc/cc.h> 75 #include <netinet/cc/cc_newreno.h> 76 #include <netinet/tcp_fastopen.h> 77 #include <netinet/tcp_lro.h> 78 #ifdef NETFLIX_SHARED_CWND 79 #include <netinet/tcp_shared_cwnd.h> 80 #endif 81 #ifdef TCP_OFFLOAD 82 #include <netinet/tcp_offload.h> 83 #endif 84 #ifdef INET6 85 #include <netinet6/tcp6_var.h> 86 #endif 87 #include <netinet/tcp_ecn.h> 88 89 #include <netipsec/ipsec_support.h> 90 91 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 92 #include <netipsec/ipsec.h> 93 #include <netipsec/ipsec6.h> 94 #endif /* IPSEC */ 95 96 #include <netinet/udp.h> 97 #include <netinet/udp_var.h> 98 #include <machine/in_cksum.h> 99 100 #ifdef MAC 101 #include <security/mac/mac_framework.h> 102 #endif 103 #include "sack_filter.h" 104 #include "tcp_rack.h" 105 #include "tailq_hash.h" 106 107 108 struct rack_sendmap * 109 tqhash_min(struct tailq_hash *hs) 110 { 111 struct rack_sendmap *rsm; 112 113 rsm = tqhash_find(hs, hs->min); 114 return(rsm); 115 } 116 117 struct rack_sendmap * 118 tqhash_max(struct tailq_hash *hs) 119 { 120 struct rack_sendmap *rsm; 121 122 rsm = tqhash_find(hs, (hs->max - 1)); 123 return (rsm); 124 } 125 126 int 127 tqhash_empty(struct tailq_hash *hs) 128 { 129 if (hs->count == 0) 130 return(1); 131 return(0); 132 } 133 134 struct rack_sendmap * 135 tqhash_find(struct tailq_hash *hs, uint32_t seq) 136 { 137 struct rack_sendmap *e; 138 int bindex, pbucket, fc = 1; 139 140 if ((SEQ_LT(seq, hs->min)) || 141 (hs->count == 0) || 142 (SEQ_GEQ(seq, hs->max))) { 143 /* Not here */ 144 return (NULL); 145 } 146 bindex = seq / SEQ_BUCKET_SIZE; 147 bindex %= MAX_HASH_ENTRIES; 148 /* Lets look through the bucket it belongs to */ 149 if (TAILQ_EMPTY(&hs->ht[bindex])) { 150 goto look_backwards; 151 } 152 TAILQ_FOREACH(e, &hs->ht[bindex], next) { 153 if (fc == 1) { 154 /* 155 * Special check for when a cum-ack 156 * as moved up over a seq and now its 157 * a bucket behind where it belongs. In 158 * the case of SACKs which create new rsm's 159 * this won't occur. 160 */ 161 if (SEQ_GT(e->r_start, seq)) { 162 goto look_backwards; 163 } 164 fc = 0; 165 } 166 if (SEQ_GEQ(seq, e->r_start) && 167 (SEQ_LT(seq, e->r_end))) { 168 /* Its in this block */ 169 return (e); 170 } 171 } 172 /* Did not find it */ 173 return (NULL); 174 look_backwards: 175 if (bindex == 0) 176 pbucket = MAX_HASH_ENTRIES - 1; 177 else 178 pbucket = bindex - 1; 179 TAILQ_FOREACH_REVERSE(e, &hs->ht[pbucket], rack_head, next) { 180 if (SEQ_GEQ(seq, e->r_start) && 181 (SEQ_LT(seq, e->r_end))) { 182 /* Its in this block */ 183 return (e); 184 } 185 if (SEQ_GEQ(e->r_end, seq)) 186 break; 187 } 188 return (NULL); 189 } 190 191 struct rack_sendmap * 192 tqhash_next(struct tailq_hash *hs, struct rack_sendmap *rsm) 193 { 194 struct rack_sendmap *e; 195 196 e = TAILQ_NEXT(rsm, next); 197 if (e == NULL) { 198 /* Move to next bucket */ 199 int nxt; 200 201 nxt = rsm->bindex + 1; 202 if (nxt >= MAX_HASH_ENTRIES) 203 nxt = 0; 204 e = TAILQ_FIRST(&hs->ht[nxt]); 205 } 206 return(e); 207 } 208 209 struct rack_sendmap * 210 tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm) 211 { 212 struct rack_sendmap *e; 213 214 e = TAILQ_PREV(rsm, rack_head, next); 215 if (e == NULL) { 216 int prev; 217 218 if (rsm->bindex > 0) 219 prev = rsm->bindex - 1; 220 else 221 prev = MAX_HASH_ENTRIES - 1; 222 e = TAILQ_LAST(&hs->ht[prev], rack_head); 223 } 224 return (e); 225 } 226 227 void 228 tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type) 229 { 230 TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next); 231 hs->count--; 232 if (hs->count == 0) { 233 hs->min = hs->max; 234 } else if (type == REMOVE_TYPE_CUMACK) { 235 hs->min = rsm->r_end; 236 } 237 } 238 239 int 240 tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm) 241 { 242 struct rack_sendmap *e, *l; 243 int inserted = 0; 244 uint32_t ebucket; 245 246 if (hs->count > 0) { 247 if ((rsm->r_end - hs->min) > MAX_ALLOWED_SEQ_RANGE) { 248 return (-1); 249 } 250 e = tqhash_find(hs, rsm->r_start); 251 if (e) { 252 return (-2); 253 } 254 } 255 rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE; 256 rsm->bindex %= MAX_HASH_ENTRIES; 257 ebucket = rsm->r_end / SEQ_BUCKET_SIZE; 258 ebucket %= MAX_HASH_ENTRIES; 259 if (ebucket != rsm->bindex) { 260 /* This RSM straddles the bucket boundary */ 261 rsm->r_flags |= RACK_STRADDLE; 262 } else { 263 rsm->r_flags &= ~RACK_STRADDLE; 264 } 265 if (hs->count == 0) { 266 /* Special case */ 267 hs->min = rsm->r_start; 268 hs->max = rsm->r_end; 269 hs->count = 1; 270 } else { 271 hs->count++; 272 if (SEQ_GT(rsm->r_end, hs->max)) 273 hs->max = rsm->r_end; 274 if (SEQ_LT(rsm->r_start, hs->min)) 275 hs->min = rsm->r_start; 276 } 277 /* Check the common case of inserting at the end */ 278 l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head); 279 if ((l == NULL) || (SEQ_GT(rsm->r_start, l->r_start))) { 280 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next); 281 return (0); 282 } 283 TAILQ_FOREACH(e, &hs->ht[rsm->bindex], next) { 284 if (SEQ_LEQ(rsm->r_start, e->r_start)) { 285 inserted = 1; 286 TAILQ_INSERT_BEFORE(e, rsm, next); 287 break; 288 } 289 } 290 if (inserted == 0) { 291 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next); 292 } 293 return (0); 294 } 295 296 void 297 tqhash_init(struct tailq_hash *hs) 298 { 299 int i; 300 301 for(i = 0; i < MAX_HASH_ENTRIES; i++) { 302 TAILQ_INIT(&hs->ht[i]); 303 } 304 hs->min = hs->max = 0; 305 hs->count = 0; 306 } 307 308 int 309 tqhash_trim(struct tailq_hash *hs, uint32_t th_ack) 310 { 311 struct rack_sendmap *rsm; 312 313 if (SEQ_LT(th_ack, hs->min)) { 314 /* It can't be behind our current min */ 315 return (-1); 316 } 317 if (SEQ_GEQ(th_ack, hs->max)) { 318 /* It can't be beyond or at our current max */ 319 return (-2); 320 } 321 rsm = tqhash_min(hs); 322 if (rsm == NULL) { 323 /* nothing to trim */ 324 return (-3); 325 } 326 if (SEQ_GEQ(th_ack, rsm->r_end)) { 327 /* 328 * You can't trim all bytes instead 329 * you need to remove it. 330 */ 331 return (-4); 332 } 333 if (SEQ_GT(th_ack, hs->min)) 334 hs->min = th_ack; 335 /* 336 * Should we trim it for the caller? 337 * they may have already which is ok... 338 */ 339 if (SEQ_GT(th_ack, rsm->r_start)) { 340 rsm->r_start = th_ack; 341 } 342 return (0); 343 } 344 345