1 #include <sys/cdefs.h> 2 #include "opt_inet.h" 3 #include "opt_inet6.h" 4 #include "opt_ipsec.h" 5 #include "opt_ratelimit.h" 6 #include "opt_kern_tls.h" 7 #include <sys/param.h> 8 #include <sys/arb.h> 9 #include <sys/module.h> 10 #include <sys/kernel.h> 11 #ifdef TCP_HHOOK 12 #include <sys/hhook.h> 13 #endif 14 #include <sys/lock.h> 15 #include <sys/malloc.h> 16 #include <sys/lock.h> 17 #include <sys/mutex.h> 18 #include <sys/mbuf.h> 19 #include <sys/proc.h> /* for proc0 declaration */ 20 #include <sys/socket.h> 21 #include <sys/socketvar.h> 22 #include <sys/sysctl.h> 23 #include <sys/systm.h> 24 #ifdef STATS 25 #include <sys/qmath.h> 26 #include <sys/tree.h> 27 #include <sys/stats.h> /* Must come after qmath.h and tree.h */ 28 #else 29 #include <sys/tree.h> 30 #endif 31 #include <sys/refcount.h> 32 #include <sys/queue.h> 33 #include <sys/tim_filter.h> 34 #include <sys/smp.h> 35 #include <sys/kthread.h> 36 #include <sys/kern_prefetch.h> 37 #include <sys/protosw.h> 38 #ifdef TCP_ACCOUNTING 39 #include <sys/sched.h> 40 #include <machine/cpu.h> 41 #endif 42 #include <vm/uma.h> 43 44 #include <net/route.h> 45 #include <net/route/nhop.h> 46 #include <net/vnet.h> 47 48 #define TCPSTATES /* for logging */ 49 50 #include <netinet/in.h> 51 #include <netinet/in_kdtrace.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip_icmp.h> /* required for icmp_var.h */ 55 #include <netinet/icmp_var.h> /* for ICMP_BANDLIM */ 56 #include <netinet/ip_var.h> 57 #include <netinet/ip6.h> 58 #include <netinet6/in6_pcb.h> 59 #include <netinet6/ip6_var.h> 60 #include <netinet/tcp.h> 61 #include <netinet/tcp_fsm.h> 62 #include <netinet/tcp_seq.h> 63 #include <netinet/tcp_timer.h> 64 #include <netinet/tcp_var.h> 65 #include <netinet/tcp_log_buf.h> 66 #include <netinet/tcp_syncache.h> 67 #include <netinet/tcp_hpts.h> 68 #include <netinet/tcp_ratelimit.h> 69 #include <netinet/tcp_accounting.h> 70 #include <netinet/tcpip.h> 71 #include <netinet/cc/cc.h> 72 #include <netinet/cc/cc_newreno.h> 73 #include <netinet/tcp_fastopen.h> 74 #include <netinet/tcp_lro.h> 75 #ifdef NETFLIX_SHARED_CWND 76 #include <netinet/tcp_shared_cwnd.h> 77 #endif 78 #ifdef TCP_OFFLOAD 79 #include <netinet/tcp_offload.h> 80 #endif 81 #ifdef INET6 82 #include <netinet6/tcp6_var.h> 83 #endif 84 #include <netinet/tcp_ecn.h> 85 86 #include <netipsec/ipsec_support.h> 87 88 #if defined(IPSEC) || defined(IPSEC_SUPPORT) 89 #include <netipsec/ipsec.h> 90 #include <netipsec/ipsec6.h> 91 #endif /* IPSEC */ 92 93 #include <netinet/udp.h> 94 #include <netinet/udp_var.h> 95 #include <machine/in_cksum.h> 96 97 #ifdef MAC 98 #include <security/mac/mac_framework.h> 99 #endif 100 #include "sack_filter.h" 101 #include "tcp_rack.h" 102 #include "tailq_hash.h" 103 104 105 struct rack_sendmap * 106 tqhash_min(struct tailq_hash *hs) 107 { 108 struct rack_sendmap *rsm; 109 110 rsm = tqhash_find(hs, hs->min); 111 return(rsm); 112 } 113 114 struct rack_sendmap * 115 tqhash_max(struct tailq_hash *hs) 116 { 117 struct rack_sendmap *rsm; 118 119 rsm = tqhash_find(hs, (hs->max - 1)); 120 return (rsm); 121 } 122 123 int 124 tqhash_empty(struct tailq_hash *hs) 125 { 126 if (hs->count == 0) 127 return(1); 128 return(0); 129 } 130 131 struct rack_sendmap * 132 tqhash_find(struct tailq_hash *hs, uint32_t seq) 133 { 134 struct rack_sendmap *e; 135 int bindex, pbucket, fc = 1; 136 137 if ((SEQ_LT(seq, hs->min)) || 138 (hs->count == 0) || 139 (SEQ_GEQ(seq, hs->max))) { 140 /* Not here */ 141 return (NULL); 142 } 143 bindex = seq / SEQ_BUCKET_SIZE; 144 bindex %= MAX_HASH_ENTRIES; 145 /* Lets look through the bucket it belongs to */ 146 if (TAILQ_EMPTY(&hs->ht[bindex])) { 147 goto look_backwards; 148 } 149 TAILQ_FOREACH(e, &hs->ht[bindex], next) { 150 if (fc == 1) { 151 /* 152 * Special check for when a cum-ack 153 * as moved up over a seq and now its 154 * a bucket behind where it belongs. In 155 * the case of SACKs which create new rsm's 156 * this won't occur. 157 */ 158 if (SEQ_GT(e->r_start, seq)) { 159 goto look_backwards; 160 } 161 fc = 0; 162 } 163 if (SEQ_GEQ(seq, e->r_start) && 164 (SEQ_LT(seq, e->r_end))) { 165 /* Its in this block */ 166 return (e); 167 } 168 } 169 /* Did not find it */ 170 return (NULL); 171 look_backwards: 172 if (bindex == 0) 173 pbucket = MAX_HASH_ENTRIES - 1; 174 else 175 pbucket = bindex - 1; 176 TAILQ_FOREACH_REVERSE(e, &hs->ht[pbucket], rack_head, next) { 177 if (SEQ_GEQ(seq, e->r_start) && 178 (SEQ_LT(seq, e->r_end))) { 179 /* Its in this block */ 180 return (e); 181 } 182 if (SEQ_GEQ(e->r_end, seq)) 183 break; 184 } 185 return (NULL); 186 } 187 188 struct rack_sendmap * 189 tqhash_next(struct tailq_hash *hs, struct rack_sendmap *rsm) 190 { 191 struct rack_sendmap *e; 192 193 e = TAILQ_NEXT(rsm, next); 194 if (e == NULL) { 195 /* Move to next bucket */ 196 int nxt; 197 198 nxt = rsm->bindex + 1; 199 if (nxt >= MAX_HASH_ENTRIES) 200 nxt = 0; 201 e = TAILQ_FIRST(&hs->ht[nxt]); 202 } 203 return(e); 204 } 205 206 struct rack_sendmap * 207 tqhash_prev(struct tailq_hash *hs, struct rack_sendmap *rsm) 208 { 209 struct rack_sendmap *e; 210 211 e = TAILQ_PREV(rsm, rack_head, next); 212 if (e == NULL) { 213 int prev; 214 215 if (rsm->bindex > 0) 216 prev = rsm->bindex - 1; 217 else 218 prev = MAX_HASH_ENTRIES - 1; 219 e = TAILQ_LAST(&hs->ht[prev], rack_head); 220 } 221 return (e); 222 } 223 224 void 225 tqhash_remove(struct tailq_hash *hs, struct rack_sendmap *rsm, int type) 226 { 227 TAILQ_REMOVE(&hs->ht[rsm->bindex], rsm, next); 228 hs->count--; 229 if (hs->count == 0) { 230 hs->min = hs->max; 231 } else if (type == REMOVE_TYPE_CUMACK) { 232 hs->min = rsm->r_end; 233 } 234 } 235 236 int 237 tqhash_insert(struct tailq_hash *hs, struct rack_sendmap *rsm) 238 { 239 struct rack_sendmap *e, *l; 240 int inserted = 0; 241 uint32_t ebucket; 242 243 if (hs->count > 0) { 244 if ((rsm->r_end - hs->min) > MAX_ALLOWED_SEQ_RANGE) { 245 return (-1); 246 } 247 e = tqhash_find(hs, rsm->r_start); 248 if (e) { 249 return (-2); 250 } 251 } 252 rsm->bindex = rsm->r_start / SEQ_BUCKET_SIZE; 253 rsm->bindex %= MAX_HASH_ENTRIES; 254 ebucket = rsm->r_end / SEQ_BUCKET_SIZE; 255 ebucket %= MAX_HASH_ENTRIES; 256 if (ebucket != rsm->bindex) { 257 /* This RSM straddles the bucket boundary */ 258 rsm->r_flags |= RACK_STRADDLE; 259 } else { 260 rsm->r_flags &= ~RACK_STRADDLE; 261 } 262 if (hs->count == 0) { 263 /* Special case */ 264 hs->min = rsm->r_start; 265 hs->max = rsm->r_end; 266 hs->count = 1; 267 } else { 268 hs->count++; 269 if (SEQ_GT(rsm->r_end, hs->max)) 270 hs->max = rsm->r_end; 271 if (SEQ_LT(rsm->r_start, hs->min)) 272 hs->min = rsm->r_start; 273 } 274 /* Check the common case of inserting at the end */ 275 l = TAILQ_LAST(&hs->ht[rsm->bindex], rack_head); 276 if ((l == NULL) || (SEQ_GT(rsm->r_start, l->r_start))) { 277 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next); 278 return (0); 279 } 280 TAILQ_FOREACH(e, &hs->ht[rsm->bindex], next) { 281 if (SEQ_LEQ(rsm->r_start, e->r_start)) { 282 inserted = 1; 283 TAILQ_INSERT_BEFORE(e, rsm, next); 284 break; 285 } 286 } 287 if (inserted == 0) { 288 TAILQ_INSERT_TAIL(&hs->ht[rsm->bindex], rsm, next); 289 } 290 return (0); 291 } 292 293 void 294 tqhash_init(struct tailq_hash *hs) 295 { 296 int i; 297 298 for(i = 0; i < MAX_HASH_ENTRIES; i++) { 299 TAILQ_INIT(&hs->ht[i]); 300 } 301 hs->min = hs->max = 0; 302 hs->count = 0; 303 } 304 305 int 306 tqhash_trim(struct tailq_hash *hs, uint32_t th_ack) 307 { 308 struct rack_sendmap *rsm; 309 310 if (SEQ_LT(th_ack, hs->min)) { 311 /* It can't be behind our current min */ 312 return (-1); 313 } 314 if (SEQ_GEQ(th_ack, hs->max)) { 315 /* It can't be beyond or at our current max */ 316 return (-2); 317 } 318 rsm = tqhash_min(hs); 319 if (rsm == NULL) { 320 /* nothing to trim */ 321 return (-3); 322 } 323 if (SEQ_GEQ(th_ack, rsm->r_end)) { 324 /* 325 * You can't trim all bytes instead 326 * you need to remove it. 327 */ 328 return (-4); 329 } 330 if (SEQ_GT(th_ack, hs->min)) 331 hs->min = th_ack; 332 /* 333 * Should we trim it for the caller? 334 * they may have already which is ok... 335 */ 336 if (SEQ_GT(th_ack, rsm->r_start)) { 337 rsm->r_start = th_ack; 338 } 339 return (0); 340 } 341 342