1 /*- 2 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * 25 * $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_pf.h" 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/mbuf.h> 38 #include <sys/mutex.h> 39 #include <sys/refcount.h> 40 #include <sys/rwlock.h> 41 #include <sys/socket.h> 42 43 #include <net/if.h> 44 #include <net/vnet.h> 45 #include <net/pfvar.h> 46 #include <net/pf_mtag.h> 47 #include <net/if_pflog.h> 48 49 #include <netinet/in.h> 50 #include <netinet/ip.h> 51 #include <netinet/ip_var.h> 52 #include <netinet/tcp.h> 53 #include <netinet/tcp_fsm.h> 54 #include <netinet/tcp_seq.h> 55 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #endif /* INET6 */ 59 60 struct pf_frent { 61 LIST_ENTRY(pf_frent) fr_next; 62 union { 63 struct { 64 struct ip *_fr_ip; 65 struct mbuf *_fr_m; 66 } _frag; 67 struct { 68 uint16_t _fr_off; 69 uint16_t _fr_end; 70 } _cache; 71 } _u; 72 }; 73 #define fr_ip _u._frag._fr_ip 74 #define fr_m _u._frag._fr_m 75 #define fr_off _u._cache._fr_off 76 #define fr_end _u._cache._fr_end 77 78 struct pf_fragment { 79 RB_ENTRY(pf_fragment) fr_entry; 80 TAILQ_ENTRY(pf_fragment) frag_next; 81 struct in_addr fr_src; 82 struct in_addr fr_dst; 83 u_int8_t fr_p; /* protocol of this fragment */ 84 u_int8_t fr_flags; /* status flags */ 85 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ 86 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ 87 #define PFFRAG_DROP 0x0004 /* Drop all fragments */ 88 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) 89 u_int16_t fr_id; /* fragment id for reassemble */ 90 u_int16_t fr_max; /* fragment data max */ 91 u_int32_t fr_timeout; 92 LIST_HEAD(, pf_frent) fr_queue; 93 }; 94 95 static struct mtx pf_frag_mtx; 96 #define PF_FRAG_LOCK() mtx_lock(&pf_frag_mtx) 97 #define PF_FRAG_UNLOCK() mtx_unlock(&pf_frag_mtx) 98 #define PF_FRAG_ASSERT() mtx_assert(&pf_frag_mtx, MA_OWNED) 99 100 VNET_DEFINE(uma_zone_t, pf_state_scrub_z); /* XXX: shared with pfsync */ 101 102 static VNET_DEFINE(uma_zone_t, pf_frent_z); 103 #define V_pf_frent_z VNET(pf_frent_z) 104 static VNET_DEFINE(uma_zone_t, pf_frag_z); 105 #define V_pf_frag_z VNET(pf_frag_z) 106 107 TAILQ_HEAD(pf_fragqueue, pf_fragment); 108 TAILQ_HEAD(pf_cachequeue, pf_fragment); 109 static VNET_DEFINE(struct pf_fragqueue, pf_fragqueue); 110 #define V_pf_fragqueue VNET(pf_fragqueue) 111 static VNET_DEFINE(struct pf_cachequeue, pf_cachequeue); 112 #define V_pf_cachequeue VNET(pf_cachequeue) 113 RB_HEAD(pf_frag_tree, pf_fragment); 114 static VNET_DEFINE(struct pf_frag_tree, pf_frag_tree); 115 #define V_pf_frag_tree VNET(pf_frag_tree) 116 static VNET_DEFINE(struct pf_frag_tree, pf_cache_tree); 117 #define V_pf_cache_tree VNET(pf_cache_tree) 118 static int pf_frag_compare(struct pf_fragment *, 119 struct pf_fragment *); 120 static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 121 static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 122 123 /* Private prototypes */ 124 static void pf_free_fragment(struct pf_fragment *); 125 static void pf_remove_fragment(struct pf_fragment *); 126 static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, 127 struct tcphdr *, int, sa_family_t); 128 #ifdef INET 129 static void pf_ip2key(struct pf_fragment *, struct ip *); 130 static void pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t, 131 u_int8_t); 132 static void pf_flush_fragments(void); 133 static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); 134 static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, 135 struct pf_frent *, int); 136 static struct mbuf *pf_fragcache(struct mbuf **, struct ip*, 137 struct pf_fragment **, int, int, int *); 138 #endif /* INET */ 139 #ifdef INET6 140 static void pf_scrub_ip6(struct mbuf **, u_int8_t); 141 #endif 142 #define DPFPRINTF(x) do { \ 143 if (V_pf_status.debug >= PF_DEBUG_MISC) { \ 144 printf("%s: ", __func__); \ 145 printf x ; \ 146 } \ 147 } while(0) 148 149 void 150 pf_normalize_init(void) 151 { 152 153 V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment), 154 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 155 V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent), 156 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 157 V_pf_state_scrub_z = uma_zcreate("pf state scrubs", 158 sizeof(struct pf_state_scrub), NULL, NULL, NULL, NULL, 159 UMA_ALIGN_PTR, 0); 160 161 V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z; 162 V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; 163 uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT); 164 uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached"); 165 166 mtx_init(&pf_frag_mtx, "pf fragments", NULL, MTX_DEF); 167 168 TAILQ_INIT(&V_pf_fragqueue); 169 TAILQ_INIT(&V_pf_cachequeue); 170 } 171 172 void 173 pf_normalize_cleanup(void) 174 { 175 176 uma_zdestroy(V_pf_state_scrub_z); 177 uma_zdestroy(V_pf_frent_z); 178 uma_zdestroy(V_pf_frag_z); 179 180 mtx_destroy(&pf_frag_mtx); 181 } 182 183 static int 184 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 185 { 186 int diff; 187 188 if ((diff = a->fr_id - b->fr_id)) 189 return (diff); 190 else if ((diff = a->fr_p - b->fr_p)) 191 return (diff); 192 else if (a->fr_src.s_addr < b->fr_src.s_addr) 193 return (-1); 194 else if (a->fr_src.s_addr > b->fr_src.s_addr) 195 return (1); 196 else if (a->fr_dst.s_addr < b->fr_dst.s_addr) 197 return (-1); 198 else if (a->fr_dst.s_addr > b->fr_dst.s_addr) 199 return (1); 200 return (0); 201 } 202 203 void 204 pf_purge_expired_fragments(void) 205 { 206 struct pf_fragment *frag; 207 u_int32_t expire = time_uptime - 208 V_pf_default_rule.timeout[PFTM_FRAG]; 209 210 PF_FRAG_LOCK(); 211 while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) { 212 KASSERT((BUFFER_FRAGMENTS(frag)), 213 ("BUFFER_FRAGMENTS(frag) == 0: %s", __FUNCTION__)); 214 if (frag->fr_timeout > expire) 215 break; 216 217 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 218 pf_free_fragment(frag); 219 } 220 221 while ((frag = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue)) != NULL) { 222 KASSERT((!BUFFER_FRAGMENTS(frag)), 223 ("BUFFER_FRAGMENTS(frag) != 0: %s", __FUNCTION__)); 224 if (frag->fr_timeout > expire) 225 break; 226 227 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); 228 pf_free_fragment(frag); 229 KASSERT((TAILQ_EMPTY(&V_pf_cachequeue) || 230 TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue) != frag), 231 ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s", 232 __FUNCTION__)); 233 } 234 PF_FRAG_UNLOCK(); 235 } 236 237 #ifdef INET 238 /* 239 * Try to flush old fragments to make space for new ones 240 */ 241 static void 242 pf_flush_fragments(void) 243 { 244 struct pf_fragment *frag, *cache; 245 int goal; 246 247 PF_FRAG_ASSERT(); 248 249 goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10; 250 DPFPRINTF(("trying to free %d frag entriess\n", goal)); 251 while (goal < uma_zone_get_cur(V_pf_frent_z)) { 252 frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); 253 if (frag) 254 pf_free_fragment(frag); 255 cache = TAILQ_LAST(&V_pf_cachequeue, pf_cachequeue); 256 if (cache) 257 pf_free_fragment(cache); 258 if (frag == NULL && cache == NULL) 259 break; 260 } 261 } 262 #endif /* INET */ 263 264 /* Frees the fragments and all associated entries */ 265 static void 266 pf_free_fragment(struct pf_fragment *frag) 267 { 268 struct pf_frent *frent; 269 270 PF_FRAG_ASSERT(); 271 272 /* Free all fragments */ 273 if (BUFFER_FRAGMENTS(frag)) { 274 for (frent = LIST_FIRST(&frag->fr_queue); frent; 275 frent = LIST_FIRST(&frag->fr_queue)) { 276 LIST_REMOVE(frent, fr_next); 277 278 m_freem(frent->fr_m); 279 uma_zfree(V_pf_frent_z, frent); 280 } 281 } else { 282 for (frent = LIST_FIRST(&frag->fr_queue); frent; 283 frent = LIST_FIRST(&frag->fr_queue)) { 284 LIST_REMOVE(frent, fr_next); 285 286 KASSERT((LIST_EMPTY(&frag->fr_queue) || 287 LIST_FIRST(&frag->fr_queue)->fr_off > 288 frent->fr_end), 289 ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >" 290 " frent->fr_end): %s", __func__)); 291 292 uma_zfree(V_pf_frent_z, frent); 293 } 294 } 295 296 pf_remove_fragment(frag); 297 } 298 299 #ifdef INET 300 static void 301 pf_ip2key(struct pf_fragment *key, struct ip *ip) 302 { 303 key->fr_p = ip->ip_p; 304 key->fr_id = ip->ip_id; 305 key->fr_src.s_addr = ip->ip_src.s_addr; 306 key->fr_dst.s_addr = ip->ip_dst.s_addr; 307 } 308 309 static struct pf_fragment * 310 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) 311 { 312 struct pf_fragment key; 313 struct pf_fragment *frag; 314 315 PF_FRAG_ASSERT(); 316 317 pf_ip2key(&key, ip); 318 319 frag = RB_FIND(pf_frag_tree, tree, &key); 320 if (frag != NULL) { 321 /* XXX Are we sure we want to update the timeout? */ 322 frag->fr_timeout = time_uptime; 323 if (BUFFER_FRAGMENTS(frag)) { 324 TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); 325 TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); 326 } else { 327 TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); 328 TAILQ_INSERT_HEAD(&V_pf_cachequeue, frag, frag_next); 329 } 330 } 331 332 return (frag); 333 } 334 #endif /* INET */ 335 336 /* Removes a fragment from the fragment queue and frees the fragment */ 337 338 static void 339 pf_remove_fragment(struct pf_fragment *frag) 340 { 341 342 PF_FRAG_ASSERT(); 343 344 if (BUFFER_FRAGMENTS(frag)) { 345 RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); 346 TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); 347 uma_zfree(V_pf_frag_z, frag); 348 } else { 349 RB_REMOVE(pf_frag_tree, &V_pf_cache_tree, frag); 350 TAILQ_REMOVE(&V_pf_cachequeue, frag, frag_next); 351 uma_zfree(V_pf_frag_z, frag); 352 } 353 } 354 355 #ifdef INET 356 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) 357 static struct mbuf * 358 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, 359 struct pf_frent *frent, int mff) 360 { 361 struct mbuf *m = *m0, *m2; 362 struct pf_frent *frea, *next; 363 struct pf_frent *frep = NULL; 364 struct ip *ip = frent->fr_ip; 365 int hlen = ip->ip_hl << 2; 366 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 367 u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; 368 u_int16_t max = ip_len + off; 369 370 PF_FRAG_ASSERT(); 371 KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)), 372 ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); 373 374 /* Strip off ip header */ 375 m->m_data += hlen; 376 m->m_len -= hlen; 377 378 /* Create a new reassembly queue for this packet */ 379 if (*frag == NULL) { 380 *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); 381 if (*frag == NULL) { 382 pf_flush_fragments(); 383 *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); 384 if (*frag == NULL) 385 goto drop_fragment; 386 } 387 388 (*frag)->fr_flags = 0; 389 (*frag)->fr_max = 0; 390 (*frag)->fr_src = frent->fr_ip->ip_src; 391 (*frag)->fr_dst = frent->fr_ip->ip_dst; 392 (*frag)->fr_p = frent->fr_ip->ip_p; 393 (*frag)->fr_id = frent->fr_ip->ip_id; 394 (*frag)->fr_timeout = time_uptime; 395 LIST_INIT(&(*frag)->fr_queue); 396 397 RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag); 398 TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next); 399 400 /* We do not have a previous fragment */ 401 frep = NULL; 402 goto insert; 403 } 404 405 /* 406 * Find a fragment after the current one: 407 * - off contains the real shifted offset. 408 */ 409 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { 410 if (FR_IP_OFF(frea) > off) 411 break; 412 frep = frea; 413 } 414 415 KASSERT((frep != NULL || frea != NULL), 416 ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));; 417 418 if (frep != NULL && 419 FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 420 4 > off) 421 { 422 u_int16_t precut; 423 424 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - 425 frep->fr_ip->ip_hl * 4 - off; 426 if (precut >= ip_len) 427 goto drop_fragment; 428 m_adj(frent->fr_m, precut); 429 DPFPRINTF(("overlap -%d\n", precut)); 430 /* Enforce 8 byte boundaries */ 431 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); 432 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 433 ip_len -= precut; 434 ip->ip_len = htons(ip_len); 435 } 436 437 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); 438 frea = next) 439 { 440 u_int16_t aftercut; 441 442 aftercut = ip_len + off - FR_IP_OFF(frea); 443 DPFPRINTF(("adjust overlap %d\n", aftercut)); 444 if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl 445 * 4) 446 { 447 frea->fr_ip->ip_len = 448 htons(ntohs(frea->fr_ip->ip_len) - aftercut); 449 frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + 450 (aftercut >> 3)); 451 m_adj(frea->fr_m, aftercut); 452 break; 453 } 454 455 /* This fragment is completely overlapped, lose it */ 456 next = LIST_NEXT(frea, fr_next); 457 m_freem(frea->fr_m); 458 LIST_REMOVE(frea, fr_next); 459 uma_zfree(V_pf_frent_z, frea); 460 } 461 462 insert: 463 /* Update maximum data size */ 464 if ((*frag)->fr_max < max) 465 (*frag)->fr_max = max; 466 /* This is the last segment */ 467 if (!mff) 468 (*frag)->fr_flags |= PFFRAG_SEENLAST; 469 470 if (frep == NULL) 471 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); 472 else 473 LIST_INSERT_AFTER(frep, frent, fr_next); 474 475 /* Check if we are completely reassembled */ 476 if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) 477 return (NULL); 478 479 /* Check if we have all the data */ 480 off = 0; 481 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { 482 next = LIST_NEXT(frep, fr_next); 483 484 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; 485 if (off < (*frag)->fr_max && 486 (next == NULL || FR_IP_OFF(next) != off)) 487 { 488 DPFPRINTF(("missing fragment at %d, next %d, max %d\n", 489 off, next == NULL ? -1 : FR_IP_OFF(next), 490 (*frag)->fr_max)); 491 return (NULL); 492 } 493 } 494 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); 495 if (off < (*frag)->fr_max) 496 return (NULL); 497 498 /* We have all the data */ 499 frent = LIST_FIRST(&(*frag)->fr_queue); 500 KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__)); 501 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { 502 DPFPRINTF(("drop: too big: %d\n", off)); 503 pf_free_fragment(*frag); 504 *frag = NULL; 505 return (NULL); 506 } 507 next = LIST_NEXT(frent, fr_next); 508 509 /* Magic from ip_input */ 510 ip = frent->fr_ip; 511 m = frent->fr_m; 512 m2 = m->m_next; 513 m->m_next = NULL; 514 m_cat(m, m2); 515 uma_zfree(V_pf_frent_z, frent); 516 for (frent = next; frent != NULL; frent = next) { 517 next = LIST_NEXT(frent, fr_next); 518 519 m2 = frent->fr_m; 520 uma_zfree(V_pf_frent_z, frent); 521 m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags; 522 m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data; 523 m_cat(m, m2); 524 } 525 526 while (m->m_pkthdr.csum_data & 0xffff0000) 527 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + 528 (m->m_pkthdr.csum_data >> 16); 529 ip->ip_src = (*frag)->fr_src; 530 ip->ip_dst = (*frag)->fr_dst; 531 532 /* Remove from fragment queue */ 533 pf_remove_fragment(*frag); 534 *frag = NULL; 535 536 hlen = ip->ip_hl << 2; 537 ip->ip_len = htons(off + hlen); 538 m->m_len += hlen; 539 m->m_data -= hlen; 540 541 /* some debugging cruft by sklower, below, will go away soon */ 542 /* XXX this should be done elsewhere */ 543 if (m->m_flags & M_PKTHDR) { 544 int plen = 0; 545 for (m2 = m; m2; m2 = m2->m_next) 546 plen += m2->m_len; 547 m->m_pkthdr.len = plen; 548 } 549 550 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); 551 return (m); 552 553 drop_fragment: 554 /* Oops - fail safe - drop packet */ 555 uma_zfree(V_pf_frent_z, frent); 556 m_freem(m); 557 return (NULL); 558 } 559 560 static struct mbuf * 561 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, 562 int drop, int *nomem) 563 { 564 struct mbuf *m = *m0; 565 struct pf_frent *frp, *fra, *cur = NULL; 566 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); 567 u_int16_t off = ntohs(h->ip_off) << 3; 568 u_int16_t max = ip_len + off; 569 int hosed = 0; 570 571 PF_FRAG_ASSERT(); 572 KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)), 573 ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__)); 574 575 /* Create a new range queue for this packet */ 576 if (*frag == NULL) { 577 *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); 578 if (*frag == NULL) { 579 pf_flush_fragments(); 580 *frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); 581 if (*frag == NULL) 582 goto no_mem; 583 } 584 585 /* Get an entry for the queue */ 586 cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); 587 if (cur == NULL) { 588 uma_zfree(V_pf_frag_z, *frag); 589 *frag = NULL; 590 goto no_mem; 591 } 592 593 (*frag)->fr_flags = PFFRAG_NOBUFFER; 594 (*frag)->fr_max = 0; 595 (*frag)->fr_src = h->ip_src; 596 (*frag)->fr_dst = h->ip_dst; 597 (*frag)->fr_p = h->ip_p; 598 (*frag)->fr_id = h->ip_id; 599 (*frag)->fr_timeout = time_uptime; 600 601 cur->fr_off = off; 602 cur->fr_end = max; 603 LIST_INIT(&(*frag)->fr_queue); 604 LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next); 605 606 RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag); 607 TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next); 608 609 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max)); 610 611 goto pass; 612 } 613 614 /* 615 * Find a fragment after the current one: 616 * - off contains the real shifted offset. 617 */ 618 frp = NULL; 619 LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) { 620 if (fra->fr_off > off) 621 break; 622 frp = fra; 623 } 624 625 KASSERT((frp != NULL || fra != NULL), 626 ("!(frp != NULL || fra != NULL): %s", __FUNCTION__)); 627 628 if (frp != NULL) { 629 int precut; 630 631 precut = frp->fr_end - off; 632 if (precut >= ip_len) { 633 /* Fragment is entirely a duplicate */ 634 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", 635 h->ip_id, frp->fr_off, frp->fr_end, off, max)); 636 goto drop_fragment; 637 } 638 if (precut == 0) { 639 /* They are adjacent. Fixup cache entry */ 640 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", 641 h->ip_id, frp->fr_off, frp->fr_end, off, max)); 642 frp->fr_end = max; 643 } else if (precut > 0) { 644 /* The first part of this payload overlaps with a 645 * fragment that has already been passed. 646 * Need to trim off the first part of the payload. 647 * But to do so easily, we need to create another 648 * mbuf to throw the original header into. 649 */ 650 651 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", 652 h->ip_id, precut, frp->fr_off, frp->fr_end, off, 653 max)); 654 655 off += precut; 656 max -= precut; 657 /* Update the previous frag to encompass this one */ 658 frp->fr_end = max; 659 660 if (!drop) { 661 /* XXX Optimization opportunity 662 * This is a very heavy way to trim the payload. 663 * we could do it much faster by diddling mbuf 664 * internals but that would be even less legible 665 * than this mbuf magic. For my next trick, 666 * I'll pull a rabbit out of my laptop. 667 */ 668 *m0 = m_dup(m, M_NOWAIT); 669 if (*m0 == NULL) 670 goto no_mem; 671 /* From KAME Project : We have missed this! */ 672 m_adj(*m0, (h->ip_hl << 2) - 673 (*m0)->m_pkthdr.len); 674 675 KASSERT(((*m0)->m_next == NULL), 676 ("(*m0)->m_next != NULL: %s", 677 __FUNCTION__)); 678 m_adj(m, precut + (h->ip_hl << 2)); 679 m_cat(*m0, m); 680 m = *m0; 681 if (m->m_flags & M_PKTHDR) { 682 int plen = 0; 683 struct mbuf *t; 684 for (t = m; t; t = t->m_next) 685 plen += t->m_len; 686 m->m_pkthdr.len = plen; 687 } 688 689 690 h = mtod(m, struct ip *); 691 692 KASSERT(((int)m->m_len == 693 ntohs(h->ip_len) - precut), 694 ("m->m_len != ntohs(h->ip_len) - precut: %s", 695 __FUNCTION__)); 696 h->ip_off = htons(ntohs(h->ip_off) + 697 (precut >> 3)); 698 h->ip_len = htons(ntohs(h->ip_len) - precut); 699 } else { 700 hosed++; 701 } 702 } else { 703 /* There is a gap between fragments */ 704 705 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", 706 h->ip_id, -precut, frp->fr_off, frp->fr_end, off, 707 max)); 708 709 cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); 710 if (cur == NULL) 711 goto no_mem; 712 713 cur->fr_off = off; 714 cur->fr_end = max; 715 LIST_INSERT_AFTER(frp, cur, fr_next); 716 } 717 } 718 719 if (fra != NULL) { 720 int aftercut; 721 int merge = 0; 722 723 aftercut = max - fra->fr_off; 724 if (aftercut == 0) { 725 /* Adjacent fragments */ 726 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", 727 h->ip_id, off, max, fra->fr_off, fra->fr_end)); 728 fra->fr_off = off; 729 merge = 1; 730 } else if (aftercut > 0) { 731 /* Need to chop off the tail of this fragment */ 732 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", 733 h->ip_id, aftercut, off, max, fra->fr_off, 734 fra->fr_end)); 735 fra->fr_off = off; 736 max -= aftercut; 737 738 merge = 1; 739 740 if (!drop) { 741 m_adj(m, -aftercut); 742 if (m->m_flags & M_PKTHDR) { 743 int plen = 0; 744 struct mbuf *t; 745 for (t = m; t; t = t->m_next) 746 plen += t->m_len; 747 m->m_pkthdr.len = plen; 748 } 749 h = mtod(m, struct ip *); 750 KASSERT(((int)m->m_len == ntohs(h->ip_len) - aftercut), 751 ("m->m_len != ntohs(h->ip_len) - aftercut: %s", 752 __FUNCTION__)); 753 h->ip_len = htons(ntohs(h->ip_len) - aftercut); 754 } else { 755 hosed++; 756 } 757 } else if (frp == NULL) { 758 /* There is a gap between fragments */ 759 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", 760 h->ip_id, -aftercut, off, max, fra->fr_off, 761 fra->fr_end)); 762 763 cur = uma_zalloc(V_pf_frent_z, M_NOWAIT); 764 if (cur == NULL) 765 goto no_mem; 766 767 cur->fr_off = off; 768 cur->fr_end = max; 769 LIST_INSERT_BEFORE(fra, cur, fr_next); 770 } 771 772 773 /* Need to glue together two separate fragment descriptors */ 774 if (merge) { 775 if (cur && fra->fr_off <= cur->fr_end) { 776 /* Need to merge in a previous 'cur' */ 777 DPFPRINTF(("fragcache[%d]: adjacent(merge " 778 "%d-%d) %d-%d (%d-%d)\n", 779 h->ip_id, cur->fr_off, cur->fr_end, off, 780 max, fra->fr_off, fra->fr_end)); 781 fra->fr_off = cur->fr_off; 782 LIST_REMOVE(cur, fr_next); 783 uma_zfree(V_pf_frent_z, cur); 784 cur = NULL; 785 786 } else if (frp && fra->fr_off <= frp->fr_end) { 787 /* Need to merge in a modified 'frp' */ 788 KASSERT((cur == NULL), ("cur != NULL: %s", 789 __FUNCTION__)); 790 DPFPRINTF(("fragcache[%d]: adjacent(merge " 791 "%d-%d) %d-%d (%d-%d)\n", 792 h->ip_id, frp->fr_off, frp->fr_end, off, 793 max, fra->fr_off, fra->fr_end)); 794 fra->fr_off = frp->fr_off; 795 LIST_REMOVE(frp, fr_next); 796 uma_zfree(V_pf_frent_z, frp); 797 frp = NULL; 798 799 } 800 } 801 } 802 803 if (hosed) { 804 /* 805 * We must keep tracking the overall fragment even when 806 * we're going to drop it anyway so that we know when to 807 * free the overall descriptor. Thus we drop the frag late. 808 */ 809 goto drop_fragment; 810 } 811 812 813 pass: 814 /* Update maximum data size */ 815 if ((*frag)->fr_max < max) 816 (*frag)->fr_max = max; 817 818 /* This is the last segment */ 819 if (!mff) 820 (*frag)->fr_flags |= PFFRAG_SEENLAST; 821 822 /* Check if we are completely reassembled */ 823 if (((*frag)->fr_flags & PFFRAG_SEENLAST) && 824 LIST_FIRST(&(*frag)->fr_queue)->fr_off == 0 && 825 LIST_FIRST(&(*frag)->fr_queue)->fr_end == (*frag)->fr_max) { 826 /* Remove from fragment queue */ 827 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, 828 (*frag)->fr_max)); 829 pf_free_fragment(*frag); 830 *frag = NULL; 831 } 832 833 return (m); 834 835 no_mem: 836 *nomem = 1; 837 838 /* Still need to pay attention to !IP_MF */ 839 if (!mff && *frag != NULL) 840 (*frag)->fr_flags |= PFFRAG_SEENLAST; 841 842 m_freem(m); 843 return (NULL); 844 845 drop_fragment: 846 847 /* Still need to pay attention to !IP_MF */ 848 if (!mff && *frag != NULL) 849 (*frag)->fr_flags |= PFFRAG_SEENLAST; 850 851 if (drop) { 852 /* This fragment has been deemed bad. Don't reass */ 853 if (((*frag)->fr_flags & PFFRAG_DROP) == 0) 854 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", 855 h->ip_id)); 856 (*frag)->fr_flags |= PFFRAG_DROP; 857 } 858 859 m_freem(m); 860 return (NULL); 861 } 862 863 int 864 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, 865 struct pf_pdesc *pd) 866 { 867 struct mbuf *m = *m0; 868 struct pf_rule *r; 869 struct pf_frent *frent; 870 struct pf_fragment *frag = NULL; 871 struct ip *h = mtod(m, struct ip *); 872 int mff = (ntohs(h->ip_off) & IP_MF); 873 int hlen = h->ip_hl << 2; 874 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 875 u_int16_t max; 876 int ip_len; 877 int ip_off; 878 int tag = -1; 879 880 PF_RULES_RASSERT(); 881 882 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 883 while (r != NULL) { 884 r->evaluations++; 885 if (pfi_kif_match(r->kif, kif) == r->ifnot) 886 r = r->skip[PF_SKIP_IFP].ptr; 887 else if (r->direction && r->direction != dir) 888 r = r->skip[PF_SKIP_DIR].ptr; 889 else if (r->af && r->af != AF_INET) 890 r = r->skip[PF_SKIP_AF].ptr; 891 else if (r->proto && r->proto != h->ip_p) 892 r = r->skip[PF_SKIP_PROTO].ptr; 893 else if (PF_MISMATCHAW(&r->src.addr, 894 (struct pf_addr *)&h->ip_src.s_addr, AF_INET, 895 r->src.neg, kif, M_GETFIB(m))) 896 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 897 else if (PF_MISMATCHAW(&r->dst.addr, 898 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, 899 r->dst.neg, NULL, M_GETFIB(m))) 900 r = r->skip[PF_SKIP_DST_ADDR].ptr; 901 else if (r->match_tag && !pf_match_tag(m, r, &tag, 902 pd->pf_mtag ? pd->pf_mtag->tag : 0)) 903 r = TAILQ_NEXT(r, entries); 904 else 905 break; 906 } 907 908 if (r == NULL || r->action == PF_NOSCRUB) 909 return (PF_PASS); 910 else { 911 r->packets[dir == PF_OUT]++; 912 r->bytes[dir == PF_OUT] += pd->tot_len; 913 } 914 915 /* Check for illegal packets */ 916 if (hlen < (int)sizeof(struct ip)) 917 goto drop; 918 919 if (hlen > ntohs(h->ip_len)) 920 goto drop; 921 922 /* Clear IP_DF if the rule uses the no-df option */ 923 if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { 924 u_int16_t ip_off = h->ip_off; 925 926 h->ip_off &= htons(~IP_DF); 927 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 928 } 929 930 /* We will need other tests here */ 931 if (!fragoff && !mff) 932 goto no_fragment; 933 934 /* We're dealing with a fragment now. Don't allow fragments 935 * with IP_DF to enter the cache. If the flag was cleared by 936 * no-df above, fine. Otherwise drop it. 937 */ 938 if (h->ip_off & htons(IP_DF)) { 939 DPFPRINTF(("IP_DF\n")); 940 goto bad; 941 } 942 943 ip_len = ntohs(h->ip_len) - hlen; 944 ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 945 946 /* All fragments are 8 byte aligned */ 947 if (mff && (ip_len & 0x7)) { 948 DPFPRINTF(("mff and %d\n", ip_len)); 949 goto bad; 950 } 951 952 /* Respect maximum length */ 953 if (fragoff + ip_len > IP_MAXPACKET) { 954 DPFPRINTF(("max packet %d\n", fragoff + ip_len)); 955 goto bad; 956 } 957 max = fragoff + ip_len; 958 959 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { 960 961 /* Fully buffer all of the fragments */ 962 PF_FRAG_LOCK(); 963 frag = pf_find_fragment(h, &V_pf_frag_tree); 964 965 /* Check if we saw the last fragment already */ 966 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 967 max > frag->fr_max) 968 goto bad; 969 970 /* Get an entry for the fragment queue */ 971 frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); 972 if (frent == NULL) { 973 PF_FRAG_UNLOCK(); 974 REASON_SET(reason, PFRES_MEMORY); 975 return (PF_DROP); 976 } 977 frent->fr_ip = h; 978 frent->fr_m = m; 979 980 /* Might return a completely reassembled mbuf, or NULL */ 981 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max)); 982 *m0 = m = pf_reassemble(m0, &frag, frent, mff); 983 PF_FRAG_UNLOCK(); 984 985 if (m == NULL) 986 return (PF_DROP); 987 988 /* use mtag from concatenated mbuf chain */ 989 pd->pf_mtag = pf_find_mtag(m); 990 #ifdef DIAGNOSTIC 991 if (pd->pf_mtag == NULL) { 992 printf("%s: pf_find_mtag returned NULL(1)\n", __func__); 993 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { 994 m_freem(m); 995 *m0 = NULL; 996 goto no_mem; 997 } 998 } 999 #endif 1000 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 1001 goto drop; 1002 1003 h = mtod(m, struct ip *); 1004 } else { 1005 /* non-buffering fragment cache (drops or masks overlaps) */ 1006 int nomem = 0; 1007 1008 if (dir == PF_OUT && pd->pf_mtag->flags & PF_TAG_FRAGCACHE) { 1009 /* 1010 * Already passed the fragment cache in the 1011 * input direction. If we continued, it would 1012 * appear to be a dup and would be dropped. 1013 */ 1014 goto fragment_pass; 1015 } 1016 1017 PF_FRAG_LOCK(); 1018 frag = pf_find_fragment(h, &V_pf_cache_tree); 1019 1020 /* Check if we saw the last fragment already */ 1021 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && 1022 max > frag->fr_max) { 1023 if (r->rule_flag & PFRULE_FRAGDROP) 1024 frag->fr_flags |= PFFRAG_DROP; 1025 goto bad; 1026 } 1027 1028 *m0 = m = pf_fragcache(m0, h, &frag, mff, 1029 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); 1030 PF_FRAG_UNLOCK(); 1031 if (m == NULL) { 1032 if (nomem) 1033 goto no_mem; 1034 goto drop; 1035 } 1036 1037 /* use mtag from copied and trimmed mbuf chain */ 1038 pd->pf_mtag = pf_find_mtag(m); 1039 #ifdef DIAGNOSTIC 1040 if (pd->pf_mtag == NULL) { 1041 printf("%s: pf_find_mtag returned NULL(2)\n", __func__); 1042 if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { 1043 m_freem(m); 1044 *m0 = NULL; 1045 goto no_mem; 1046 } 1047 } 1048 #endif 1049 if (dir == PF_IN) 1050 pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; 1051 1052 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) 1053 goto drop; 1054 goto fragment_pass; 1055 } 1056 1057 no_fragment: 1058 /* At this point, only IP_DF is allowed in ip_off */ 1059 if (h->ip_off & ~htons(IP_DF)) { 1060 u_int16_t ip_off = h->ip_off; 1061 1062 h->ip_off &= htons(IP_DF); 1063 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 1064 } 1065 1066 /* not missing a return here */ 1067 1068 fragment_pass: 1069 pf_scrub_ip(&m, r->rule_flag, r->min_ttl, r->set_tos); 1070 1071 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) 1072 pd->flags |= PFDESC_IP_REAS; 1073 return (PF_PASS); 1074 1075 no_mem: 1076 REASON_SET(reason, PFRES_MEMORY); 1077 if (r != NULL && r->log) 1078 PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, 1079 1); 1080 return (PF_DROP); 1081 1082 drop: 1083 REASON_SET(reason, PFRES_NORM); 1084 if (r != NULL && r->log) 1085 PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, 1086 1); 1087 return (PF_DROP); 1088 1089 bad: 1090 DPFPRINTF(("dropping bad fragment\n")); 1091 1092 /* Free associated fragments */ 1093 if (frag != NULL) { 1094 pf_free_fragment(frag); 1095 PF_FRAG_UNLOCK(); 1096 } 1097 1098 REASON_SET(reason, PFRES_FRAG); 1099 if (r != NULL && r->log) 1100 PFLOG_PACKET(kif, m, AF_INET, dir, *reason, r, NULL, NULL, pd, 1101 1); 1102 1103 return (PF_DROP); 1104 } 1105 #endif 1106 1107 #ifdef INET6 1108 int 1109 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, 1110 u_short *reason, struct pf_pdesc *pd) 1111 { 1112 struct mbuf *m = *m0; 1113 struct pf_rule *r; 1114 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1115 int off; 1116 struct ip6_ext ext; 1117 struct ip6_opt opt; 1118 struct ip6_opt_jumbo jumbo; 1119 struct ip6_frag frag; 1120 u_int32_t jumbolen = 0, plen; 1121 u_int16_t fragoff = 0; 1122 int optend; 1123 int ooff; 1124 u_int8_t proto; 1125 int terminal; 1126 1127 PF_RULES_RASSERT(); 1128 1129 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1130 while (r != NULL) { 1131 r->evaluations++; 1132 if (pfi_kif_match(r->kif, kif) == r->ifnot) 1133 r = r->skip[PF_SKIP_IFP].ptr; 1134 else if (r->direction && r->direction != dir) 1135 r = r->skip[PF_SKIP_DIR].ptr; 1136 else if (r->af && r->af != AF_INET6) 1137 r = r->skip[PF_SKIP_AF].ptr; 1138 #if 0 /* header chain! */ 1139 else if (r->proto && r->proto != h->ip6_nxt) 1140 r = r->skip[PF_SKIP_PROTO].ptr; 1141 #endif 1142 else if (PF_MISMATCHAW(&r->src.addr, 1143 (struct pf_addr *)&h->ip6_src, AF_INET6, 1144 r->src.neg, kif, M_GETFIB(m))) 1145 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1146 else if (PF_MISMATCHAW(&r->dst.addr, 1147 (struct pf_addr *)&h->ip6_dst, AF_INET6, 1148 r->dst.neg, NULL, M_GETFIB(m))) 1149 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1150 else 1151 break; 1152 } 1153 1154 if (r == NULL || r->action == PF_NOSCRUB) 1155 return (PF_PASS); 1156 else { 1157 r->packets[dir == PF_OUT]++; 1158 r->bytes[dir == PF_OUT] += pd->tot_len; 1159 } 1160 1161 /* Check for illegal packets */ 1162 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len) 1163 goto drop; 1164 1165 off = sizeof(struct ip6_hdr); 1166 proto = h->ip6_nxt; 1167 terminal = 0; 1168 do { 1169 switch (proto) { 1170 case IPPROTO_FRAGMENT: 1171 goto fragment; 1172 break; 1173 case IPPROTO_AH: 1174 case IPPROTO_ROUTING: 1175 case IPPROTO_DSTOPTS: 1176 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 1177 NULL, AF_INET6)) 1178 goto shortpkt; 1179 if (proto == IPPROTO_AH) 1180 off += (ext.ip6e_len + 2) * 4; 1181 else 1182 off += (ext.ip6e_len + 1) * 8; 1183 proto = ext.ip6e_nxt; 1184 break; 1185 case IPPROTO_HOPOPTS: 1186 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL, 1187 NULL, AF_INET6)) 1188 goto shortpkt; 1189 optend = off + (ext.ip6e_len + 1) * 8; 1190 ooff = off + sizeof(ext); 1191 do { 1192 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, 1193 sizeof(opt.ip6o_type), NULL, NULL, 1194 AF_INET6)) 1195 goto shortpkt; 1196 if (opt.ip6o_type == IP6OPT_PAD1) { 1197 ooff++; 1198 continue; 1199 } 1200 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt), 1201 NULL, NULL, AF_INET6)) 1202 goto shortpkt; 1203 if (ooff + sizeof(opt) + opt.ip6o_len > optend) 1204 goto drop; 1205 switch (opt.ip6o_type) { 1206 case IP6OPT_JUMBO: 1207 if (h->ip6_plen != 0) 1208 goto drop; 1209 if (!pf_pull_hdr(m, ooff, &jumbo, 1210 sizeof(jumbo), NULL, NULL, 1211 AF_INET6)) 1212 goto shortpkt; 1213 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, 1214 sizeof(jumbolen)); 1215 jumbolen = ntohl(jumbolen); 1216 if (jumbolen <= IPV6_MAXPACKET) 1217 goto drop; 1218 if (sizeof(struct ip6_hdr) + jumbolen != 1219 m->m_pkthdr.len) 1220 goto drop; 1221 break; 1222 default: 1223 break; 1224 } 1225 ooff += sizeof(opt) + opt.ip6o_len; 1226 } while (ooff < optend); 1227 1228 off = optend; 1229 proto = ext.ip6e_nxt; 1230 break; 1231 default: 1232 terminal = 1; 1233 break; 1234 } 1235 } while (!terminal); 1236 1237 /* jumbo payload option must be present, or plen > 0 */ 1238 if (ntohs(h->ip6_plen) == 0) 1239 plen = jumbolen; 1240 else 1241 plen = ntohs(h->ip6_plen); 1242 if (plen == 0) 1243 goto drop; 1244 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len) 1245 goto shortpkt; 1246 1247 pf_scrub_ip6(&m, r->min_ttl); 1248 1249 return (PF_PASS); 1250 1251 fragment: 1252 if (ntohs(h->ip6_plen) == 0 || jumbolen) 1253 goto drop; 1254 plen = ntohs(h->ip6_plen); 1255 1256 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6)) 1257 goto shortpkt; 1258 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); 1259 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET) 1260 goto badfrag; 1261 1262 /* do something about it */ 1263 /* remember to set pd->flags |= PFDESC_IP_REAS */ 1264 return (PF_PASS); 1265 1266 shortpkt: 1267 REASON_SET(reason, PFRES_SHORT); 1268 if (r != NULL && r->log) 1269 PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, 1270 1); 1271 return (PF_DROP); 1272 1273 drop: 1274 REASON_SET(reason, PFRES_NORM); 1275 if (r != NULL && r->log) 1276 PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, 1277 1); 1278 return (PF_DROP); 1279 1280 badfrag: 1281 REASON_SET(reason, PFRES_FRAG); 1282 if (r != NULL && r->log) 1283 PFLOG_PACKET(kif, m, AF_INET6, dir, *reason, r, NULL, NULL, pd, 1284 1); 1285 return (PF_DROP); 1286 } 1287 #endif /* INET6 */ 1288 1289 int 1290 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, 1291 int off, void *h, struct pf_pdesc *pd) 1292 { 1293 struct pf_rule *r, *rm = NULL; 1294 struct tcphdr *th = pd->hdr.tcp; 1295 int rewrite = 0; 1296 u_short reason; 1297 u_int8_t flags; 1298 sa_family_t af = pd->af; 1299 1300 PF_RULES_RASSERT(); 1301 1302 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1303 while (r != NULL) { 1304 r->evaluations++; 1305 if (pfi_kif_match(r->kif, kif) == r->ifnot) 1306 r = r->skip[PF_SKIP_IFP].ptr; 1307 else if (r->direction && r->direction != dir) 1308 r = r->skip[PF_SKIP_DIR].ptr; 1309 else if (r->af && r->af != af) 1310 r = r->skip[PF_SKIP_AF].ptr; 1311 else if (r->proto && r->proto != pd->proto) 1312 r = r->skip[PF_SKIP_PROTO].ptr; 1313 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 1314 r->src.neg, kif, M_GETFIB(m))) 1315 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 1316 else if (r->src.port_op && !pf_match_port(r->src.port_op, 1317 r->src.port[0], r->src.port[1], th->th_sport)) 1318 r = r->skip[PF_SKIP_SRC_PORT].ptr; 1319 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 1320 r->dst.neg, NULL, M_GETFIB(m))) 1321 r = r->skip[PF_SKIP_DST_ADDR].ptr; 1322 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 1323 r->dst.port[0], r->dst.port[1], th->th_dport)) 1324 r = r->skip[PF_SKIP_DST_PORT].ptr; 1325 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( 1326 pf_osfp_fingerprint(pd, m, off, th), 1327 r->os_fingerprint)) 1328 r = TAILQ_NEXT(r, entries); 1329 else { 1330 rm = r; 1331 break; 1332 } 1333 } 1334 1335 if (rm == NULL || rm->action == PF_NOSCRUB) 1336 return (PF_PASS); 1337 else { 1338 r->packets[dir == PF_OUT]++; 1339 r->bytes[dir == PF_OUT] += pd->tot_len; 1340 } 1341 1342 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) 1343 pd->flags |= PFDESC_TCP_NORM; 1344 1345 flags = th->th_flags; 1346 if (flags & TH_SYN) { 1347 /* Illegal packet */ 1348 if (flags & TH_RST) 1349 goto tcp_drop; 1350 1351 if (flags & TH_FIN) 1352 flags &= ~TH_FIN; 1353 } else { 1354 /* Illegal packet */ 1355 if (!(flags & (TH_ACK|TH_RST))) 1356 goto tcp_drop; 1357 } 1358 1359 if (!(flags & TH_ACK)) { 1360 /* These flags are only valid if ACK is set */ 1361 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 1362 goto tcp_drop; 1363 } 1364 1365 /* Check for illegal header length */ 1366 if (th->th_off < (sizeof(struct tcphdr) >> 2)) 1367 goto tcp_drop; 1368 1369 /* If flags changed, or reserved data set, then adjust */ 1370 if (flags != th->th_flags || th->th_x2 != 0) { 1371 u_int16_t ov, nv; 1372 1373 ov = *(u_int16_t *)(&th->th_ack + 1); 1374 th->th_flags = flags; 1375 th->th_x2 = 0; 1376 nv = *(u_int16_t *)(&th->th_ack + 1); 1377 1378 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 1379 rewrite = 1; 1380 } 1381 1382 /* Remove urgent pointer, if TH_URG is not set */ 1383 if (!(flags & TH_URG) && th->th_urp) { 1384 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 1385 th->th_urp = 0; 1386 rewrite = 1; 1387 } 1388 1389 /* Process options */ 1390 if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af)) 1391 rewrite = 1; 1392 1393 /* copy back packet headers if we sanitized */ 1394 if (rewrite) 1395 m_copyback(m, off, sizeof(*th), (caddr_t)th); 1396 1397 return (PF_PASS); 1398 1399 tcp_drop: 1400 REASON_SET(&reason, PFRES_NORM); 1401 if (rm != NULL && r->log) 1402 PFLOG_PACKET(kif, m, AF_INET, dir, reason, r, NULL, NULL, pd, 1403 1); 1404 return (PF_DROP); 1405 } 1406 1407 int 1408 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, 1409 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) 1410 { 1411 u_int32_t tsval, tsecr; 1412 u_int8_t hdr[60]; 1413 u_int8_t *opt; 1414 1415 KASSERT((src->scrub == NULL), 1416 ("pf_normalize_tcp_init: src->scrub != NULL")); 1417 1418 src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT); 1419 if (src->scrub == NULL) 1420 return (1); 1421 1422 switch (pd->af) { 1423 #ifdef INET 1424 case AF_INET: { 1425 struct ip *h = mtod(m, struct ip *); 1426 src->scrub->pfss_ttl = h->ip_ttl; 1427 break; 1428 } 1429 #endif /* INET */ 1430 #ifdef INET6 1431 case AF_INET6: { 1432 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1433 src->scrub->pfss_ttl = h->ip6_hlim; 1434 break; 1435 } 1436 #endif /* INET6 */ 1437 } 1438 1439 1440 /* 1441 * All normalizations below are only begun if we see the start of 1442 * the connections. They must all set an enabled bit in pfss_flags 1443 */ 1444 if ((th->th_flags & TH_SYN) == 0) 1445 return (0); 1446 1447 1448 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 1449 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1450 /* Diddle with TCP options */ 1451 int hlen; 1452 opt = hdr + sizeof(struct tcphdr); 1453 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1454 while (hlen >= TCPOLEN_TIMESTAMP) { 1455 switch (*opt) { 1456 case TCPOPT_EOL: /* FALLTHROUGH */ 1457 case TCPOPT_NOP: 1458 opt++; 1459 hlen--; 1460 break; 1461 case TCPOPT_TIMESTAMP: 1462 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1463 src->scrub->pfss_flags |= 1464 PFSS_TIMESTAMP; 1465 src->scrub->pfss_ts_mod = 1466 htonl(arc4random()); 1467 1468 /* note PFSS_PAWS not set yet */ 1469 memcpy(&tsval, &opt[2], 1470 sizeof(u_int32_t)); 1471 memcpy(&tsecr, &opt[6], 1472 sizeof(u_int32_t)); 1473 src->scrub->pfss_tsval0 = ntohl(tsval); 1474 src->scrub->pfss_tsval = ntohl(tsval); 1475 src->scrub->pfss_tsecr = ntohl(tsecr); 1476 getmicrouptime(&src->scrub->pfss_last); 1477 } 1478 /* FALLTHROUGH */ 1479 default: 1480 hlen -= MAX(opt[1], 2); 1481 opt += MAX(opt[1], 2); 1482 break; 1483 } 1484 } 1485 } 1486 1487 return (0); 1488 } 1489 1490 void 1491 pf_normalize_tcp_cleanup(struct pf_state *state) 1492 { 1493 if (state->src.scrub) 1494 uma_zfree(V_pf_state_scrub_z, state->src.scrub); 1495 if (state->dst.scrub) 1496 uma_zfree(V_pf_state_scrub_z, state->dst.scrub); 1497 1498 /* Someday... flush the TCP segment reassembly descriptors. */ 1499 } 1500 1501 int 1502 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, 1503 u_short *reason, struct tcphdr *th, struct pf_state *state, 1504 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 1505 { 1506 struct timeval uptime; 1507 u_int32_t tsval, tsecr; 1508 u_int tsval_from_last; 1509 u_int8_t hdr[60]; 1510 u_int8_t *opt; 1511 int copyback = 0; 1512 int got_ts = 0; 1513 1514 KASSERT((src->scrub || dst->scrub), 1515 ("%s: src->scrub && dst->scrub!", __func__)); 1516 1517 /* 1518 * Enforce the minimum TTL seen for this connection. Negate a common 1519 * technique to evade an intrusion detection system and confuse 1520 * firewall state code. 1521 */ 1522 switch (pd->af) { 1523 #ifdef INET 1524 case AF_INET: { 1525 if (src->scrub) { 1526 struct ip *h = mtod(m, struct ip *); 1527 if (h->ip_ttl > src->scrub->pfss_ttl) 1528 src->scrub->pfss_ttl = h->ip_ttl; 1529 h->ip_ttl = src->scrub->pfss_ttl; 1530 } 1531 break; 1532 } 1533 #endif /* INET */ 1534 #ifdef INET6 1535 case AF_INET6: { 1536 if (src->scrub) { 1537 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1538 if (h->ip6_hlim > src->scrub->pfss_ttl) 1539 src->scrub->pfss_ttl = h->ip6_hlim; 1540 h->ip6_hlim = src->scrub->pfss_ttl; 1541 } 1542 break; 1543 } 1544 #endif /* INET6 */ 1545 } 1546 1547 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1548 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1549 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1550 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { 1551 /* Diddle with TCP options */ 1552 int hlen; 1553 opt = hdr + sizeof(struct tcphdr); 1554 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1555 while (hlen >= TCPOLEN_TIMESTAMP) { 1556 switch (*opt) { 1557 case TCPOPT_EOL: /* FALLTHROUGH */ 1558 case TCPOPT_NOP: 1559 opt++; 1560 hlen--; 1561 break; 1562 case TCPOPT_TIMESTAMP: 1563 /* Modulate the timestamps. Can be used for 1564 * NAT detection, OS uptime determination or 1565 * reboot detection. 1566 */ 1567 1568 if (got_ts) { 1569 /* Huh? Multiple timestamps!? */ 1570 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1571 DPFPRINTF(("multiple TS??")); 1572 pf_print_state(state); 1573 printf("\n"); 1574 } 1575 REASON_SET(reason, PFRES_TS); 1576 return (PF_DROP); 1577 } 1578 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1579 memcpy(&tsval, &opt[2], 1580 sizeof(u_int32_t)); 1581 if (tsval && src->scrub && 1582 (src->scrub->pfss_flags & 1583 PFSS_TIMESTAMP)) { 1584 tsval = ntohl(tsval); 1585 pf_change_a(&opt[2], 1586 &th->th_sum, 1587 htonl(tsval + 1588 src->scrub->pfss_ts_mod), 1589 0); 1590 copyback = 1; 1591 } 1592 1593 /* Modulate TS reply iff valid (!0) */ 1594 memcpy(&tsecr, &opt[6], 1595 sizeof(u_int32_t)); 1596 if (tsecr && dst->scrub && 1597 (dst->scrub->pfss_flags & 1598 PFSS_TIMESTAMP)) { 1599 tsecr = ntohl(tsecr) 1600 - dst->scrub->pfss_ts_mod; 1601 pf_change_a(&opt[6], 1602 &th->th_sum, htonl(tsecr), 1603 0); 1604 copyback = 1; 1605 } 1606 got_ts = 1; 1607 } 1608 /* FALLTHROUGH */ 1609 default: 1610 hlen -= MAX(opt[1], 2); 1611 opt += MAX(opt[1], 2); 1612 break; 1613 } 1614 } 1615 if (copyback) { 1616 /* Copyback the options, caller copys back header */ 1617 *writeback = 1; 1618 m_copyback(m, off + sizeof(struct tcphdr), 1619 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1620 sizeof(struct tcphdr)); 1621 } 1622 } 1623 1624 1625 /* 1626 * Must invalidate PAWS checks on connections idle for too long. 1627 * The fastest allowed timestamp clock is 1ms. That turns out to 1628 * be about 24 days before it wraps. XXX Right now our lowerbound 1629 * TS echo check only works for the first 12 days of a connection 1630 * when the TS has exhausted half its 32bit space 1631 */ 1632 #define TS_MAX_IDLE (24*24*60*60) 1633 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1634 1635 getmicrouptime(&uptime); 1636 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1637 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1638 time_uptime - state->creation > TS_MAX_CONN)) { 1639 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1640 DPFPRINTF(("src idled out of PAWS\n")); 1641 pf_print_state(state); 1642 printf("\n"); 1643 } 1644 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1645 | PFSS_PAWS_IDLED; 1646 } 1647 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1648 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1649 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1650 DPFPRINTF(("dst idled out of PAWS\n")); 1651 pf_print_state(state); 1652 printf("\n"); 1653 } 1654 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1655 | PFSS_PAWS_IDLED; 1656 } 1657 1658 if (got_ts && src->scrub && dst->scrub && 1659 (src->scrub->pfss_flags & PFSS_PAWS) && 1660 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1661 /* Validate that the timestamps are "in-window". 1662 * RFC1323 describes TCP Timestamp options that allow 1663 * measurement of RTT (round trip time) and PAWS 1664 * (protection against wrapped sequence numbers). PAWS 1665 * gives us a set of rules for rejecting packets on 1666 * long fat pipes (packets that were somehow delayed 1667 * in transit longer than the time it took to send the 1668 * full TCP sequence space of 4Gb). We can use these 1669 * rules and infer a few others that will let us treat 1670 * the 32bit timestamp and the 32bit echoed timestamp 1671 * as sequence numbers to prevent a blind attacker from 1672 * inserting packets into a connection. 1673 * 1674 * RFC1323 tells us: 1675 * - The timestamp on this packet must be greater than 1676 * or equal to the last value echoed by the other 1677 * endpoint. The RFC says those will be discarded 1678 * since it is a dup that has already been acked. 1679 * This gives us a lowerbound on the timestamp. 1680 * timestamp >= other last echoed timestamp 1681 * - The timestamp will be less than or equal to 1682 * the last timestamp plus the time between the 1683 * last packet and now. The RFC defines the max 1684 * clock rate as 1ms. We will allow clocks to be 1685 * up to 10% fast and will allow a total difference 1686 * or 30 seconds due to a route change. And this 1687 * gives us an upperbound on the timestamp. 1688 * timestamp <= last timestamp + max ticks 1689 * We have to be careful here. Windows will send an 1690 * initial timestamp of zero and then initialize it 1691 * to a random value after the 3whs; presumably to 1692 * avoid a DoS by having to call an expensive RNG 1693 * during a SYN flood. Proof MS has at least one 1694 * good security geek. 1695 * 1696 * - The TCP timestamp option must also echo the other 1697 * endpoints timestamp. The timestamp echoed is the 1698 * one carried on the earliest unacknowledged segment 1699 * on the left edge of the sequence window. The RFC 1700 * states that the host will reject any echoed 1701 * timestamps that were larger than any ever sent. 1702 * This gives us an upperbound on the TS echo. 1703 * tescr <= largest_tsval 1704 * - The lowerbound on the TS echo is a little more 1705 * tricky to determine. The other endpoint's echoed 1706 * values will not decrease. But there may be 1707 * network conditions that re-order packets and 1708 * cause our view of them to decrease. For now the 1709 * only lowerbound we can safely determine is that 1710 * the TS echo will never be less than the original 1711 * TS. XXX There is probably a better lowerbound. 1712 * Remove TS_MAX_CONN with better lowerbound check. 1713 * tescr >= other original TS 1714 * 1715 * It is also important to note that the fastest 1716 * timestamp clock of 1ms will wrap its 32bit space in 1717 * 24 days. So we just disable TS checking after 24 1718 * days of idle time. We actually must use a 12d 1719 * connection limit until we can come up with a better 1720 * lowerbound to the TS echo check. 1721 */ 1722 struct timeval delta_ts; 1723 int ts_fudge; 1724 1725 1726 /* 1727 * PFTM_TS_DIFF is how many seconds of leeway to allow 1728 * a host's timestamp. This can happen if the previous 1729 * packet got delayed in transit for much longer than 1730 * this packet. 1731 */ 1732 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1733 ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF]; 1734 1735 /* Calculate max ticks since the last timestamp */ 1736 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1737 #define TS_MICROSECS 1000000 /* microseconds per second */ 1738 delta_ts = uptime; 1739 timevalsub(&delta_ts, &src->scrub->pfss_last); 1740 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1741 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1742 1743 if ((src->state >= TCPS_ESTABLISHED && 1744 dst->state >= TCPS_ESTABLISHED) && 1745 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1746 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1747 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1748 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1749 /* Bad RFC1323 implementation or an insertion attack. 1750 * 1751 * - Solaris 2.6 and 2.7 are known to send another ACK 1752 * after the FIN,FIN|ACK,ACK closing that carries 1753 * an old timestamp. 1754 */ 1755 1756 DPFPRINTF(("Timestamp failed %c%c%c%c\n", 1757 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1758 SEQ_GT(tsval, src->scrub->pfss_tsval + 1759 tsval_from_last) ? '1' : ' ', 1760 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1761 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); 1762 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " 1763 "idle: %jus %lums\n", 1764 tsval, tsecr, tsval_from_last, 1765 (uintmax_t)delta_ts.tv_sec, 1766 delta_ts.tv_usec / 1000)); 1767 DPFPRINTF((" src->tsval: %u tsecr: %u\n", 1768 src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); 1769 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u" 1770 "\n", dst->scrub->pfss_tsval, 1771 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0)); 1772 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1773 pf_print_state(state); 1774 pf_print_flags(th->th_flags); 1775 printf("\n"); 1776 } 1777 REASON_SET(reason, PFRES_TS); 1778 return (PF_DROP); 1779 } 1780 1781 /* XXX I'd really like to require tsecr but it's optional */ 1782 1783 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1784 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1785 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1786 src->scrub && dst->scrub && 1787 (src->scrub->pfss_flags & PFSS_PAWS) && 1788 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1789 /* Didn't send a timestamp. Timestamps aren't really useful 1790 * when: 1791 * - connection opening or closing (often not even sent). 1792 * but we must not let an attacker to put a FIN on a 1793 * data packet to sneak it through our ESTABLISHED check. 1794 * - on a TCP reset. RFC suggests not even looking at TS. 1795 * - on an empty ACK. The TS will not be echoed so it will 1796 * probably not help keep the RTT calculation in sync and 1797 * there isn't as much danger when the sequence numbers 1798 * got wrapped. So some stacks don't include TS on empty 1799 * ACKs :-( 1800 * 1801 * To minimize the disruption to mostly RFC1323 conformant 1802 * stacks, we will only require timestamps on data packets. 1803 * 1804 * And what do ya know, we cannot require timestamps on data 1805 * packets. There appear to be devices that do legitimate 1806 * TCP connection hijacking. There are HTTP devices that allow 1807 * a 3whs (with timestamps) and then buffer the HTTP request. 1808 * If the intermediate device has the HTTP response cache, it 1809 * will spoof the response but not bother timestamping its 1810 * packets. So we can look for the presence of a timestamp in 1811 * the first data packet and if there, require it in all future 1812 * packets. 1813 */ 1814 1815 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1816 /* 1817 * Hey! Someone tried to sneak a packet in. Or the 1818 * stack changed its RFC1323 behavior?!?! 1819 */ 1820 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1821 DPFPRINTF(("Did not receive expected RFC1323 " 1822 "timestamp\n")); 1823 pf_print_state(state); 1824 pf_print_flags(th->th_flags); 1825 printf("\n"); 1826 } 1827 REASON_SET(reason, PFRES_TS); 1828 return (PF_DROP); 1829 } 1830 } 1831 1832 1833 /* 1834 * We will note if a host sends his data packets with or without 1835 * timestamps. And require all data packets to contain a timestamp 1836 * if the first does. PAWS implicitly requires that all data packets be 1837 * timestamped. But I think there are middle-man devices that hijack 1838 * TCP streams immediately after the 3whs and don't timestamp their 1839 * packets (seen in a WWW accelerator or cache). 1840 */ 1841 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1842 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1843 if (got_ts) 1844 src->scrub->pfss_flags |= PFSS_DATA_TS; 1845 else { 1846 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1847 if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 1848 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1849 /* Don't warn if other host rejected RFC1323 */ 1850 DPFPRINTF(("Broken RFC1323 stack did not " 1851 "timestamp data packet. Disabled PAWS " 1852 "security.\n")); 1853 pf_print_state(state); 1854 pf_print_flags(th->th_flags); 1855 printf("\n"); 1856 } 1857 } 1858 } 1859 1860 1861 /* 1862 * Update PAWS values 1863 */ 1864 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1865 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1866 getmicrouptime(&src->scrub->pfss_last); 1867 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1868 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1869 src->scrub->pfss_tsval = tsval; 1870 1871 if (tsecr) { 1872 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1873 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1874 src->scrub->pfss_tsecr = tsecr; 1875 1876 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1877 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1878 src->scrub->pfss_tsval0 == 0)) { 1879 /* tsval0 MUST be the lowest timestamp */ 1880 src->scrub->pfss_tsval0 = tsval; 1881 } 1882 1883 /* Only fully initialized after a TS gets echoed */ 1884 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1885 src->scrub->pfss_flags |= PFSS_PAWS; 1886 } 1887 } 1888 1889 /* I have a dream.... TCP segment reassembly.... */ 1890 return (0); 1891 } 1892 1893 static int 1894 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, 1895 int off, sa_family_t af) 1896 { 1897 u_int16_t *mss; 1898 int thoff; 1899 int opt, cnt, optlen = 0; 1900 int rewrite = 0; 1901 u_char opts[TCP_MAXOLEN]; 1902 u_char *optp = opts; 1903 1904 thoff = th->th_off << 2; 1905 cnt = thoff - sizeof(struct tcphdr); 1906 1907 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt, 1908 NULL, NULL, af)) 1909 return (rewrite); 1910 1911 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1912 opt = optp[0]; 1913 if (opt == TCPOPT_EOL) 1914 break; 1915 if (opt == TCPOPT_NOP) 1916 optlen = 1; 1917 else { 1918 if (cnt < 2) 1919 break; 1920 optlen = optp[1]; 1921 if (optlen < 2 || optlen > cnt) 1922 break; 1923 } 1924 switch (opt) { 1925 case TCPOPT_MAXSEG: 1926 mss = (u_int16_t *)(optp + 2); 1927 if ((ntohs(*mss)) > r->max_mss) { 1928 th->th_sum = pf_cksum_fixup(th->th_sum, 1929 *mss, htons(r->max_mss), 0); 1930 *mss = htons(r->max_mss); 1931 rewrite = 1; 1932 } 1933 break; 1934 default: 1935 break; 1936 } 1937 } 1938 1939 if (rewrite) 1940 m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts); 1941 1942 return (rewrite); 1943 } 1944 1945 #ifdef INET 1946 static void 1947 pf_scrub_ip(struct mbuf **m0, u_int32_t flags, u_int8_t min_ttl, u_int8_t tos) 1948 { 1949 struct mbuf *m = *m0; 1950 struct ip *h = mtod(m, struct ip *); 1951 1952 /* Clear IP_DF if no-df was requested */ 1953 if (flags & PFRULE_NODF && h->ip_off & htons(IP_DF)) { 1954 u_int16_t ip_off = h->ip_off; 1955 1956 h->ip_off &= htons(~IP_DF); 1957 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 1958 } 1959 1960 /* Enforce a minimum ttl, may cause endless packet loops */ 1961 if (min_ttl && h->ip_ttl < min_ttl) { 1962 u_int16_t ip_ttl = h->ip_ttl; 1963 1964 h->ip_ttl = min_ttl; 1965 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 1966 } 1967 1968 /* Enforce tos */ 1969 if (flags & PFRULE_SET_TOS) { 1970 u_int16_t ov, nv; 1971 1972 ov = *(u_int16_t *)h; 1973 h->ip_tos = tos; 1974 nv = *(u_int16_t *)h; 1975 1976 h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); 1977 } 1978 1979 /* random-id, but not for fragments */ 1980 if (flags & PFRULE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { 1981 u_int16_t ip_id = h->ip_id; 1982 1983 h->ip_id = ip_randomid(); 1984 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); 1985 } 1986 } 1987 #endif /* INET */ 1988 1989 #ifdef INET6 1990 static void 1991 pf_scrub_ip6(struct mbuf **m0, u_int8_t min_ttl) 1992 { 1993 struct mbuf *m = *m0; 1994 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 1995 1996 /* Enforce a minimum ttl, may cause endless packet loops */ 1997 if (min_ttl && h->ip6_hlim < min_ttl) 1998 h->ip6_hlim = min_ttl; 1999 } 2000 #endif 2001