1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2011-2018 Alexander Bluhm <bluhm@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $OpenBSD: pf_norm.c,v 1.114 2009/01/29 14:11:45 henning Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 #include "opt_pf.h" 35 36 #include <sys/param.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/mbuf.h> 40 #include <sys/mutex.h> 41 #include <sys/refcount.h> 42 #include <sys/socket.h> 43 44 #include <net/if.h> 45 #include <net/if_var.h> 46 #include <net/if_private.h> 47 #include <net/vnet.h> 48 #include <net/pfvar.h> 49 #include <net/if_pflog.h> 50 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/ip_var.h> 54 #include <netinet6/in6_var.h> 55 #include <netinet6/nd6.h> 56 #include <netinet6/ip6_var.h> 57 #include <netinet6/scope6_var.h> 58 #include <netinet/tcp.h> 59 #include <netinet/tcp_fsm.h> 60 #include <netinet/tcp_seq.h> 61 #include <netinet/sctp_constants.h> 62 #include <netinet/sctp_header.h> 63 64 #ifdef INET6 65 #include <netinet/ip6.h> 66 #endif /* INET6 */ 67 68 struct pf_frent { 69 TAILQ_ENTRY(pf_frent) fr_next; 70 struct mbuf *fe_m; 71 uint16_t fe_hdrlen; /* ipv4 header length with ip options 72 ipv6, extension, fragment header */ 73 uint16_t fe_extoff; /* last extension header offset or 0 */ 74 uint16_t fe_len; /* fragment length */ 75 uint16_t fe_off; /* fragment offset */ 76 uint16_t fe_mff; /* more fragment flag */ 77 }; 78 79 RB_HEAD(pf_frag_tree, pf_fragment); 80 struct pf_frnode { 81 struct pf_addr fn_src; /* ip source address */ 82 struct pf_addr fn_dst; /* ip destination address */ 83 sa_family_t fn_af; /* address family */ 84 u_int8_t fn_proto; /* protocol for fragments in fn_tree */ 85 u_int32_t fn_fragments; /* number of entries in fn_tree */ 86 87 RB_ENTRY(pf_frnode) fn_entry; 88 struct pf_frag_tree fn_tree; /* matching fragments, lookup by id */ 89 }; 90 91 struct pf_fragment { 92 uint32_t fr_id; /* fragment id for reassemble */ 93 94 /* pointers to queue element */ 95 struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS]; 96 /* count entries between pointers */ 97 uint8_t fr_entries[PF_FRAG_ENTRY_POINTS]; 98 RB_ENTRY(pf_fragment) fr_entry; 99 TAILQ_ENTRY(pf_fragment) frag_next; 100 uint32_t fr_timeout; 101 TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; 102 uint16_t fr_maxlen; /* maximum length of single fragment */ 103 u_int16_t fr_holes; /* number of holes in the queue */ 104 struct pf_frnode *fr_node; /* ip src/dst/proto/af for fragments */ 105 }; 106 107 VNET_DEFINE_STATIC(struct mtx, pf_frag_mtx); 108 #define V_pf_frag_mtx VNET(pf_frag_mtx) 109 #define PF_FRAG_LOCK() mtx_lock(&V_pf_frag_mtx) 110 #define PF_FRAG_UNLOCK() mtx_unlock(&V_pf_frag_mtx) 111 #define PF_FRAG_ASSERT() mtx_assert(&V_pf_frag_mtx, MA_OWNED) 112 113 VNET_DEFINE(uma_zone_t, pf_state_scrub_z); /* XXX: shared with pfsync */ 114 115 VNET_DEFINE_STATIC(uma_zone_t, pf_frent_z); 116 #define V_pf_frent_z VNET(pf_frent_z) 117 VNET_DEFINE_STATIC(uma_zone_t, pf_frnode_z); 118 #define V_pf_frnode_z VNET(pf_frnode_z) 119 VNET_DEFINE_STATIC(uma_zone_t, pf_frag_z); 120 #define V_pf_frag_z VNET(pf_frag_z) 121 VNET_DEFINE(uma_zone_t, pf_anchor_z); 122 VNET_DEFINE(uma_zone_t, pf_eth_anchor_z); 123 124 TAILQ_HEAD(pf_fragqueue, pf_fragment); 125 TAILQ_HEAD(pf_cachequeue, pf_fragment); 126 RB_HEAD(pf_frnode_tree, pf_frnode); 127 VNET_DEFINE_STATIC(struct pf_fragqueue, pf_fragqueue); 128 #define V_pf_fragqueue VNET(pf_fragqueue) 129 static __inline int pf_frnode_compare(struct pf_frnode *, 130 struct pf_frnode *); 131 VNET_DEFINE_STATIC(struct pf_frnode_tree, pf_frnode_tree); 132 #define V_pf_frnode_tree VNET(pf_frnode_tree) 133 RB_PROTOTYPE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare); 134 RB_GENERATE(pf_frnode_tree, pf_frnode, fn_entry, pf_frnode_compare); 135 136 static int pf_frag_compare(struct pf_fragment *, 137 struct pf_fragment *); 138 static RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 139 static RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 140 141 static void pf_flush_fragments(void); 142 static void pf_free_fragment(struct pf_fragment *); 143 144 static struct pf_frent *pf_create_fragment(u_short *); 145 static int pf_frent_holes(struct pf_frent *frent); 146 static struct pf_fragment *pf_find_fragment(struct pf_frnode *, u_int32_t); 147 static inline int pf_frent_index(struct pf_frent *); 148 static int pf_frent_insert(struct pf_fragment *, 149 struct pf_frent *, struct pf_frent *); 150 void pf_frent_remove(struct pf_fragment *, 151 struct pf_frent *); 152 struct pf_frent *pf_frent_previous(struct pf_fragment *, 153 struct pf_frent *); 154 static struct pf_fragment *pf_fillup_fragment(struct pf_frnode *, u_int32_t, 155 struct pf_frent *, u_short *); 156 static struct mbuf *pf_join_fragment(struct pf_fragment *); 157 #ifdef INET 158 static int pf_reassemble(struct mbuf **, u_short *); 159 #endif /* INET */ 160 #ifdef INET6 161 static int pf_reassemble6(struct mbuf **, 162 struct ip6_frag *, uint16_t, uint16_t, u_short *); 163 #endif /* INET6 */ 164 165 #ifdef INET 166 static void 167 pf_ip2key(struct ip *ip, struct pf_frnode *key) 168 { 169 170 key->fn_src.v4 = ip->ip_src; 171 key->fn_dst.v4 = ip->ip_dst; 172 key->fn_af = AF_INET; 173 key->fn_proto = ip->ip_p; 174 } 175 #endif /* INET */ 176 177 void 178 pf_normalize_init(void) 179 { 180 181 V_pf_frag_z = uma_zcreate("pf frags", sizeof(struct pf_fragment), 182 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 183 V_pf_frnode_z = uma_zcreate("pf fragment node", 184 sizeof(struct pf_frnode), NULL, NULL, NULL, NULL, 185 UMA_ALIGN_PTR, 0); 186 V_pf_frent_z = uma_zcreate("pf frag entries", sizeof(struct pf_frent), 187 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 188 V_pf_state_scrub_z = uma_zcreate("pf state scrubs", 189 sizeof(struct pf_state_scrub), NULL, NULL, NULL, NULL, 190 UMA_ALIGN_PTR, 0); 191 192 mtx_init(&V_pf_frag_mtx, "pf fragments", NULL, MTX_DEF); 193 194 V_pf_limits[PF_LIMIT_FRAGS].zone = V_pf_frent_z; 195 V_pf_limits[PF_LIMIT_FRAGS].limit = PFFRAG_FRENT_HIWAT; 196 uma_zone_set_max(V_pf_frent_z, PFFRAG_FRENT_HIWAT); 197 uma_zone_set_warning(V_pf_frent_z, "PF frag entries limit reached"); 198 199 TAILQ_INIT(&V_pf_fragqueue); 200 } 201 202 void 203 pf_normalize_cleanup(void) 204 { 205 206 uma_zdestroy(V_pf_state_scrub_z); 207 uma_zdestroy(V_pf_frent_z); 208 uma_zdestroy(V_pf_frnode_z); 209 uma_zdestroy(V_pf_frag_z); 210 211 mtx_destroy(&V_pf_frag_mtx); 212 } 213 214 uint64_t 215 pf_normalize_get_frag_count(void) 216 { 217 return (uma_zone_get_cur(V_pf_frent_z)); 218 } 219 220 static int 221 pf_frnode_compare(struct pf_frnode *a, struct pf_frnode *b) 222 { 223 int diff; 224 225 if ((diff = a->fn_proto - b->fn_proto) != 0) 226 return (diff); 227 if ((diff = a->fn_af - b->fn_af) != 0) 228 return (diff); 229 if ((diff = pf_addr_cmp(&a->fn_src, &b->fn_src, a->fn_af)) != 0) 230 return (diff); 231 if ((diff = pf_addr_cmp(&a->fn_dst, &b->fn_dst, a->fn_af)) != 0) 232 return (diff); 233 return (0); 234 } 235 236 static __inline int 237 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 238 { 239 int diff; 240 241 if ((diff = a->fr_id - b->fr_id) != 0) 242 return (diff); 243 244 return (0); 245 } 246 247 void 248 pf_purge_expired_fragments(void) 249 { 250 u_int32_t expire = time_uptime - 251 V_pf_default_rule.timeout[PFTM_FRAG]; 252 253 pf_purge_fragments(expire); 254 } 255 256 void 257 pf_purge_fragments(uint32_t expire) 258 { 259 struct pf_fragment *frag; 260 261 PF_FRAG_LOCK(); 262 while ((frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue)) != NULL) { 263 if (frag->fr_timeout > expire) 264 break; 265 266 DPFPRINTF(PF_DEBUG_MISC, "expiring %d(%p)", 267 frag->fr_id, frag); 268 pf_free_fragment(frag); 269 } 270 271 PF_FRAG_UNLOCK(); 272 } 273 274 /* 275 * Try to flush old fragments to make space for new ones 276 */ 277 static void 278 pf_flush_fragments(void) 279 { 280 struct pf_fragment *frag; 281 int goal; 282 283 PF_FRAG_ASSERT(); 284 285 goal = uma_zone_get_cur(V_pf_frent_z) * 9 / 10; 286 DPFPRINTF(PF_DEBUG_MISC, "trying to free %d frag entriess", goal); 287 while (goal < uma_zone_get_cur(V_pf_frent_z)) { 288 frag = TAILQ_LAST(&V_pf_fragqueue, pf_fragqueue); 289 if (frag) 290 pf_free_fragment(frag); 291 else 292 break; 293 } 294 } 295 296 /* 297 * Remove a fragment from the fragment queue, free its fragment entries, 298 * and free the fragment itself. 299 */ 300 static void 301 pf_free_fragment(struct pf_fragment *frag) 302 { 303 struct pf_frent *frent; 304 struct pf_frnode *frnode; 305 306 PF_FRAG_ASSERT(); 307 308 frnode = frag->fr_node; 309 RB_REMOVE(pf_frag_tree, &frnode->fn_tree, frag); 310 MPASS(frnode->fn_fragments >= 1); 311 frnode->fn_fragments--; 312 if (frnode->fn_fragments == 0) { 313 MPASS(RB_EMPTY(&frnode->fn_tree)); 314 RB_REMOVE(pf_frnode_tree, &V_pf_frnode_tree, frnode); 315 uma_zfree(V_pf_frnode_z, frnode); 316 } 317 318 TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); 319 320 /* Free all fragment entries */ 321 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 322 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 323 counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); 324 325 m_freem(frent->fe_m); 326 uma_zfree(V_pf_frent_z, frent); 327 } 328 329 uma_zfree(V_pf_frag_z, frag); 330 } 331 332 static struct pf_fragment * 333 pf_find_fragment(struct pf_frnode *key, uint32_t id) 334 { 335 struct pf_fragment *frag, idkey; 336 struct pf_frnode *frnode; 337 338 PF_FRAG_ASSERT(); 339 340 frnode = RB_FIND(pf_frnode_tree, &V_pf_frnode_tree, key); 341 counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_SEARCH], 1); 342 if (frnode == NULL) 343 return (NULL); 344 MPASS(frnode->fn_fragments >= 1); 345 idkey.fr_id = id; 346 frag = RB_FIND(pf_frag_tree, &frnode->fn_tree, &idkey); 347 if (frag == NULL) 348 return (NULL); 349 TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); 350 TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); 351 352 return (frag); 353 } 354 355 static struct pf_frent * 356 pf_create_fragment(u_short *reason) 357 { 358 struct pf_frent *frent; 359 360 PF_FRAG_ASSERT(); 361 362 frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); 363 if (frent == NULL) { 364 pf_flush_fragments(); 365 frent = uma_zalloc(V_pf_frent_z, M_NOWAIT); 366 if (frent == NULL) { 367 REASON_SET(reason, PFRES_MEMORY); 368 return (NULL); 369 } 370 } 371 372 return (frent); 373 } 374 375 /* 376 * Calculate the additional holes that were created in the fragment 377 * queue by inserting this fragment. A fragment in the middle 378 * creates one more hole by splitting. For each connected side, 379 * it loses one hole. 380 * Fragment entry must be in the queue when calling this function. 381 */ 382 static int 383 pf_frent_holes(struct pf_frent *frent) 384 { 385 struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next); 386 struct pf_frent *next = TAILQ_NEXT(frent, fr_next); 387 int holes = 1; 388 389 if (prev == NULL) { 390 if (frent->fe_off == 0) 391 holes--; 392 } else { 393 KASSERT(frent->fe_off != 0, ("frent->fe_off != 0")); 394 if (frent->fe_off == prev->fe_off + prev->fe_len) 395 holes--; 396 } 397 if (next == NULL) { 398 if (!frent->fe_mff) 399 holes--; 400 } else { 401 KASSERT(frent->fe_mff, ("frent->fe_mff")); 402 if (next->fe_off == frent->fe_off + frent->fe_len) 403 holes--; 404 } 405 return holes; 406 } 407 408 static inline int 409 pf_frent_index(struct pf_frent *frent) 410 { 411 /* 412 * We have an array of 16 entry points to the queue. A full size 413 * 65535 octet IP packet can have 8192 fragments. So the queue 414 * traversal length is at most 512 and at most 16 entry points are 415 * checked. We need 128 additional bytes on a 64 bit architecture. 416 */ 417 CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) == 418 16 - 1); 419 CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1); 420 421 return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS); 422 } 423 424 static int 425 pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent, 426 struct pf_frent *prev) 427 { 428 int index; 429 430 CTASSERT(PF_FRAG_ENTRY_LIMIT <= 0xff); 431 432 /* 433 * A packet has at most 65536 octets. With 16 entry points, each one 434 * spawns 4096 octets. We limit these to 64 fragments each, which 435 * means on average every fragment must have at least 64 octets. 436 */ 437 index = pf_frent_index(frent); 438 if (frag->fr_entries[index] >= PF_FRAG_ENTRY_LIMIT) 439 return ENOBUFS; 440 frag->fr_entries[index]++; 441 442 if (prev == NULL) { 443 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 444 } else { 445 KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off, 446 ("overlapping fragment")); 447 TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); 448 } 449 counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_INSERT], 1); 450 451 if (frag->fr_firstoff[index] == NULL) { 452 KASSERT(prev == NULL || pf_frent_index(prev) < index, 453 ("prev == NULL || pf_frent_index(pref) < index")); 454 frag->fr_firstoff[index] = frent; 455 } else { 456 if (frent->fe_off < frag->fr_firstoff[index]->fe_off) { 457 KASSERT(prev == NULL || pf_frent_index(prev) < index, 458 ("prev == NULL || pf_frent_index(pref) < index")); 459 frag->fr_firstoff[index] = frent; 460 } else { 461 KASSERT(prev != NULL, ("prev != NULL")); 462 KASSERT(pf_frent_index(prev) == index, 463 ("pf_frent_index(prev) == index")); 464 } 465 } 466 467 frag->fr_holes += pf_frent_holes(frent); 468 469 return 0; 470 } 471 472 void 473 pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent) 474 { 475 #ifdef INVARIANTS 476 struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next); 477 #endif /* INVARIANTS */ 478 struct pf_frent *next = TAILQ_NEXT(frent, fr_next); 479 int index; 480 481 frag->fr_holes -= pf_frent_holes(frent); 482 483 index = pf_frent_index(frent); 484 KASSERT(frag->fr_firstoff[index] != NULL, ("frent not found")); 485 if (frag->fr_firstoff[index]->fe_off == frent->fe_off) { 486 if (next == NULL) { 487 frag->fr_firstoff[index] = NULL; 488 } else { 489 KASSERT(frent->fe_off + frent->fe_len <= next->fe_off, 490 ("overlapping fragment")); 491 if (pf_frent_index(next) == index) { 492 frag->fr_firstoff[index] = next; 493 } else { 494 frag->fr_firstoff[index] = NULL; 495 } 496 } 497 } else { 498 KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off, 499 ("frag->fr_firstoff[index]->fe_off < frent->fe_off")); 500 KASSERT(prev != NULL, ("prev != NULL")); 501 KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off, 502 ("overlapping fragment")); 503 KASSERT(pf_frent_index(prev) == index, 504 ("pf_frent_index(prev) == index")); 505 } 506 507 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 508 counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); 509 510 KASSERT(frag->fr_entries[index] > 0, ("No fragments remaining")); 511 frag->fr_entries[index]--; 512 } 513 514 struct pf_frent * 515 pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent) 516 { 517 struct pf_frent *prev, *next; 518 int index; 519 520 /* 521 * If there are no fragments after frag, take the final one. Assume 522 * that the global queue is not empty. 523 */ 524 prev = TAILQ_LAST(&frag->fr_queue, pf_fragq); 525 KASSERT(prev != NULL, ("prev != NULL")); 526 if (prev->fe_off <= frent->fe_off) 527 return prev; 528 /* 529 * We want to find a fragment entry that is before frag, but still 530 * close to it. Find the first fragment entry that is in the same 531 * entry point or in the first entry point after that. As we have 532 * already checked that there are entries behind frag, this will 533 * succeed. 534 */ 535 for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS; 536 index++) { 537 prev = frag->fr_firstoff[index]; 538 if (prev != NULL) 539 break; 540 } 541 KASSERT(prev != NULL, ("prev != NULL")); 542 /* 543 * In prev we may have a fragment from the same entry point that is 544 * before frent, or one that is just one position behind frent. 545 * In the latter case, we go back one step and have the predecessor. 546 * There may be none if the new fragment will be the first one. 547 */ 548 if (prev->fe_off > frent->fe_off) { 549 prev = TAILQ_PREV(prev, pf_fragq, fr_next); 550 if (prev == NULL) 551 return NULL; 552 KASSERT(prev->fe_off <= frent->fe_off, 553 ("prev->fe_off <= frent->fe_off")); 554 return prev; 555 } 556 /* 557 * In prev is the first fragment of the entry point. The offset 558 * of frag is behind it. Find the closest previous fragment. 559 */ 560 for (next = TAILQ_NEXT(prev, fr_next); next != NULL; 561 next = TAILQ_NEXT(next, fr_next)) { 562 if (next->fe_off > frent->fe_off) 563 break; 564 prev = next; 565 } 566 return prev; 567 } 568 569 static struct pf_fragment * 570 pf_fillup_fragment(struct pf_frnode *key, uint32_t id, 571 struct pf_frent *frent, u_short *reason) 572 { 573 struct pf_frent *after, *next, *prev; 574 struct pf_fragment *frag; 575 struct pf_frnode *frnode; 576 uint16_t total; 577 578 PF_FRAG_ASSERT(); 579 580 /* No empty fragments. */ 581 if (frent->fe_len == 0) { 582 DPFPRINTF(PF_DEBUG_MISC, "bad fragment: len 0"); 583 goto bad_fragment; 584 } 585 586 /* All fragments are 8 byte aligned. */ 587 if (frent->fe_mff && (frent->fe_len & 0x7)) { 588 DPFPRINTF(PF_DEBUG_MISC, "bad fragment: mff and len %d", 589 frent->fe_len); 590 goto bad_fragment; 591 } 592 593 /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */ 594 if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { 595 DPFPRINTF(PF_DEBUG_MISC, "bad fragment: max packet %d", 596 frent->fe_off + frent->fe_len); 597 goto bad_fragment; 598 } 599 600 if (key->fn_af == AF_INET) 601 DPFPRINTF(PF_DEBUG_MISC, "reass frag %d @ %d-%d\n", 602 id, frent->fe_off, frent->fe_off + frent->fe_len); 603 else 604 DPFPRINTF(PF_DEBUG_MISC, "reass frag %#08x @ %d-%d", 605 id, frent->fe_off, frent->fe_off + frent->fe_len); 606 607 /* Fully buffer all of the fragments in this fragment queue. */ 608 frag = pf_find_fragment(key, id); 609 610 /* Create a new reassembly queue for this packet. */ 611 if (frag == NULL) { 612 frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); 613 if (frag == NULL) { 614 pf_flush_fragments(); 615 frag = uma_zalloc(V_pf_frag_z, M_NOWAIT); 616 if (frag == NULL) { 617 REASON_SET(reason, PFRES_MEMORY); 618 goto drop_fragment; 619 } 620 } 621 622 frnode = RB_FIND(pf_frnode_tree, &V_pf_frnode_tree, key); 623 if (frnode == NULL) { 624 frnode = uma_zalloc(V_pf_frnode_z, M_NOWAIT); 625 if (frnode == NULL) { 626 pf_flush_fragments(); 627 frnode = uma_zalloc(V_pf_frnode_z, M_NOWAIT); 628 if (frnode == NULL) { 629 REASON_SET(reason, PFRES_MEMORY); 630 uma_zfree(V_pf_frag_z, frag); 631 goto drop_fragment; 632 } 633 } 634 *frnode = *key; 635 RB_INIT(&frnode->fn_tree); 636 frnode->fn_fragments = 0; 637 } 638 memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff)); 639 memset(frag->fr_entries, 0, sizeof(frag->fr_entries)); 640 frag->fr_timeout = time_uptime; 641 TAILQ_INIT(&frag->fr_queue); 642 frag->fr_maxlen = frent->fe_len; 643 frag->fr_holes = 1; 644 645 frag->fr_id = id; 646 frag->fr_node = frnode; 647 /* RB_INSERT cannot fail as pf_find_fragment() found nothing */ 648 RB_INSERT(pf_frag_tree, &frnode->fn_tree, frag); 649 frnode->fn_fragments++; 650 if (frnode->fn_fragments == 1) 651 RB_INSERT(pf_frnode_tree, &V_pf_frnode_tree, frnode); 652 653 TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); 654 655 /* We do not have a previous fragment, cannot fail. */ 656 pf_frent_insert(frag, frent, NULL); 657 658 return (frag); 659 } 660 661 KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue")); 662 MPASS(frag->fr_node); 663 664 /* Remember maximum fragment len for refragmentation. */ 665 if (frent->fe_len > frag->fr_maxlen) 666 frag->fr_maxlen = frent->fe_len; 667 668 /* Maximum data we have seen already. */ 669 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 670 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 671 672 /* Non terminal fragments must have more fragments flag. */ 673 if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) 674 goto free_ipv6_fragment; 675 676 /* Check if we saw the last fragment already. */ 677 if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { 678 if (frent->fe_off + frent->fe_len > total || 679 (frent->fe_off + frent->fe_len == total && frent->fe_mff)) 680 goto free_ipv6_fragment; 681 } else { 682 if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) 683 goto free_ipv6_fragment; 684 } 685 686 /* Find neighbors for newly inserted fragment */ 687 prev = pf_frent_previous(frag, frent); 688 if (prev == NULL) { 689 after = TAILQ_FIRST(&frag->fr_queue); 690 KASSERT(after != NULL, ("after != NULL")); 691 } else { 692 after = TAILQ_NEXT(prev, fr_next); 693 } 694 695 if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { 696 uint16_t precut; 697 698 if (frag->fr_node->fn_af == AF_INET6) 699 goto free_fragment; 700 701 precut = prev->fe_off + prev->fe_len - frent->fe_off; 702 if (precut >= frent->fe_len) { 703 DPFPRINTF(PF_DEBUG_MISC, "new frag overlapped"); 704 goto drop_fragment; 705 } 706 DPFPRINTF(PF_DEBUG_MISC, "frag head overlap %d", precut); 707 m_adj(frent->fe_m, precut); 708 frent->fe_off += precut; 709 frent->fe_len -= precut; 710 } 711 712 for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; 713 after = next) { 714 uint16_t aftercut; 715 716 aftercut = frent->fe_off + frent->fe_len - after->fe_off; 717 if (aftercut < after->fe_len) { 718 DPFPRINTF(PF_DEBUG_MISC, "frag tail overlap %d", 719 aftercut); 720 m_adj(after->fe_m, aftercut); 721 /* Fragment may switch queue as fe_off changes */ 722 pf_frent_remove(frag, after); 723 after->fe_off += aftercut; 724 after->fe_len -= aftercut; 725 /* Insert into correct queue */ 726 if (pf_frent_insert(frag, after, prev)) { 727 DPFPRINTF(PF_DEBUG_MISC, 728 "fragment requeue limit exceeded"); 729 m_freem(after->fe_m); 730 uma_zfree(V_pf_frent_z, after); 731 /* There is not way to recover */ 732 goto free_fragment; 733 } 734 break; 735 } 736 737 /* This fragment is completely overlapped, lose it. */ 738 DPFPRINTF(PF_DEBUG_MISC, "old frag overlapped"); 739 next = TAILQ_NEXT(after, fr_next); 740 pf_frent_remove(frag, after); 741 m_freem(after->fe_m); 742 uma_zfree(V_pf_frent_z, after); 743 } 744 745 /* If part of the queue gets too long, there is not way to recover. */ 746 if (pf_frent_insert(frag, frent, prev)) { 747 DPFPRINTF(PF_DEBUG_MISC, "fragment queue limit exceeded"); 748 goto bad_fragment; 749 } 750 751 return (frag); 752 753 free_ipv6_fragment: 754 if (frag->fr_node->fn_af == AF_INET) 755 goto bad_fragment; 756 free_fragment: 757 /* 758 * RFC 5722, Errata 3089: When reassembling an IPv6 datagram, if one 759 * or more its constituent fragments is determined to be an overlapping 760 * fragment, the entire datagram (and any constituent fragments) MUST 761 * be silently discarded. 762 */ 763 DPFPRINTF(PF_DEBUG_MISC, "flush overlapping fragments"); 764 pf_free_fragment(frag); 765 766 bad_fragment: 767 REASON_SET(reason, PFRES_FRAG); 768 drop_fragment: 769 uma_zfree(V_pf_frent_z, frent); 770 return (NULL); 771 } 772 773 static struct mbuf * 774 pf_join_fragment(struct pf_fragment *frag) 775 { 776 struct mbuf *m, *m2; 777 struct pf_frent *frent; 778 779 frent = TAILQ_FIRST(&frag->fr_queue); 780 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 781 counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); 782 783 m = frent->fe_m; 784 if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len) 785 m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len); 786 uma_zfree(V_pf_frent_z, frent); 787 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 788 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 789 counter_u64_add(V_pf_status.ncounters[NCNT_FRAG_REMOVALS], 1); 790 791 m2 = frent->fe_m; 792 /* Strip off ip header. */ 793 m_adj(m2, frent->fe_hdrlen); 794 /* Strip off any trailing bytes. */ 795 if (frent->fe_len < m2->m_pkthdr.len) 796 m_adj(m2, frent->fe_len - m2->m_pkthdr.len); 797 798 uma_zfree(V_pf_frent_z, frent); 799 m_cat(m, m2); 800 } 801 802 /* Remove from fragment queue. */ 803 pf_free_fragment(frag); 804 805 return (m); 806 } 807 808 #ifdef INET 809 static int 810 pf_reassemble(struct mbuf **m0, u_short *reason) 811 { 812 struct mbuf *m = *m0; 813 struct ip *ip = mtod(m, struct ip *); 814 struct pf_frent *frent; 815 struct pf_fragment *frag; 816 struct m_tag *mtag; 817 struct pf_fragment_tag *ftag; 818 struct pf_frnode key; 819 uint16_t total, hdrlen; 820 uint32_t frag_id; 821 uint16_t maxlen; 822 823 /* Get an entry for the fragment queue */ 824 if ((frent = pf_create_fragment(reason)) == NULL) 825 return (PF_DROP); 826 827 frent->fe_m = m; 828 frent->fe_hdrlen = ip->ip_hl << 2; 829 frent->fe_extoff = 0; 830 frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 831 frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 832 frent->fe_mff = ntohs(ip->ip_off) & IP_MF; 833 834 pf_ip2key(ip, &key); 835 836 if ((frag = pf_fillup_fragment(&key, ip->ip_id, frent, reason)) == NULL) 837 return (PF_DROP); 838 839 /* The mbuf is part of the fragment entry, no direct free or access */ 840 m = *m0 = NULL; 841 842 if (frag->fr_holes) { 843 DPFPRINTF(PF_DEBUG_MISC, "frag %d, holes %d", 844 frag->fr_id, frag->fr_holes); 845 return (PF_PASS); /* drop because *m0 is NULL, no error */ 846 } 847 848 /* We have all the data */ 849 frent = TAILQ_FIRST(&frag->fr_queue); 850 KASSERT(frent != NULL, ("frent != NULL")); 851 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 852 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 853 hdrlen = frent->fe_hdrlen; 854 855 maxlen = frag->fr_maxlen; 856 frag_id = frag->fr_id; 857 m = *m0 = pf_join_fragment(frag); 858 frag = NULL; 859 860 if (m->m_flags & M_PKTHDR) { 861 int plen = 0; 862 for (m = *m0; m; m = m->m_next) 863 plen += m->m_len; 864 m = *m0; 865 m->m_pkthdr.len = plen; 866 } 867 868 if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, 869 sizeof(struct pf_fragment_tag), M_NOWAIT)) == NULL) { 870 REASON_SET(reason, PFRES_SHORT); 871 /* PF_DROP requires a valid mbuf *m0 in pf_test() */ 872 return (PF_DROP); 873 } 874 ftag = (struct pf_fragment_tag *)(mtag + 1); 875 ftag->ft_hdrlen = hdrlen; 876 ftag->ft_extoff = 0; 877 ftag->ft_maxlen = maxlen; 878 ftag->ft_id = frag_id; 879 m_tag_prepend(m, mtag); 880 881 ip = mtod(m, struct ip *); 882 ip->ip_sum = pf_cksum_fixup(ip->ip_sum, ip->ip_len, 883 htons(hdrlen + total), 0); 884 ip->ip_len = htons(hdrlen + total); 885 ip->ip_sum = pf_cksum_fixup(ip->ip_sum, ip->ip_off, 886 ip->ip_off & ~(IP_MF|IP_OFFMASK), 0); 887 ip->ip_off &= ~(IP_MF|IP_OFFMASK); 888 889 if (hdrlen + total > IP_MAXPACKET) { 890 DPFPRINTF(PF_DEBUG_MISC, "drop: too big: %d", total); 891 ip->ip_len = 0; 892 REASON_SET(reason, PFRES_SHORT); 893 /* PF_DROP requires a valid mbuf *m0 in pf_test() */ 894 return (PF_DROP); 895 } 896 897 DPFPRINTF(PF_DEBUG_MISC, "complete: %p(%d)", m, ntohs(ip->ip_len)); 898 return (PF_PASS); 899 } 900 #endif /* INET */ 901 902 #ifdef INET6 903 static int 904 pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, 905 uint16_t hdrlen, uint16_t extoff, u_short *reason) 906 { 907 struct mbuf *m = *m0; 908 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 909 struct pf_frent *frent; 910 struct pf_fragment *frag; 911 struct pf_frnode key; 912 struct m_tag *mtag; 913 struct pf_fragment_tag *ftag; 914 int off; 915 uint32_t frag_id; 916 uint16_t total, maxlen; 917 uint8_t proto; 918 919 PF_FRAG_LOCK(); 920 921 /* Get an entry for the fragment queue. */ 922 if ((frent = pf_create_fragment(reason)) == NULL) { 923 PF_FRAG_UNLOCK(); 924 return (PF_DROP); 925 } 926 927 frent->fe_m = m; 928 frent->fe_hdrlen = hdrlen; 929 frent->fe_extoff = extoff; 930 frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; 931 frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 932 frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; 933 934 key.fn_src.v6 = ip6->ip6_src; 935 key.fn_dst.v6 = ip6->ip6_dst; 936 key.fn_af = AF_INET6; 937 /* Only the first fragment's protocol is relevant. */ 938 key.fn_proto = 0; 939 940 if ((frag = pf_fillup_fragment(&key, fraghdr->ip6f_ident, frent, reason)) == NULL) { 941 PF_FRAG_UNLOCK(); 942 return (PF_DROP); 943 } 944 945 /* The mbuf is part of the fragment entry, no direct free or access. */ 946 m = *m0 = NULL; 947 948 if (frag->fr_holes) { 949 DPFPRINTF(PF_DEBUG_MISC, "frag %d, holes %d", frag->fr_id, 950 frag->fr_holes); 951 PF_FRAG_UNLOCK(); 952 return (PF_PASS); /* Drop because *m0 is NULL, no error. */ 953 } 954 955 /* We have all the data. */ 956 frent = TAILQ_FIRST(&frag->fr_queue); 957 KASSERT(frent != NULL, ("frent != NULL")); 958 extoff = frent->fe_extoff; 959 maxlen = frag->fr_maxlen; 960 frag_id = frag->fr_id; 961 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 962 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 963 hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); 964 965 m = *m0 = pf_join_fragment(frag); 966 frag = NULL; 967 968 PF_FRAG_UNLOCK(); 969 970 /* Take protocol from first fragment header. */ 971 m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off); 972 KASSERT(m, ("%s: short mbuf chain", __func__)); 973 proto = *(mtod(m, uint8_t *) + off); 974 m = *m0; 975 976 /* Delete frag6 header */ 977 if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0) 978 goto fail; 979 980 if (m->m_flags & M_PKTHDR) { 981 int plen = 0; 982 for (m = *m0; m; m = m->m_next) 983 plen += m->m_len; 984 m = *m0; 985 m->m_pkthdr.len = plen; 986 } 987 988 if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, 989 sizeof(struct pf_fragment_tag), M_NOWAIT)) == NULL) 990 goto fail; 991 ftag = (struct pf_fragment_tag *)(mtag + 1); 992 ftag->ft_hdrlen = hdrlen; 993 ftag->ft_extoff = extoff; 994 ftag->ft_maxlen = maxlen; 995 ftag->ft_id = frag_id; 996 m_tag_prepend(m, mtag); 997 998 ip6 = mtod(m, struct ip6_hdr *); 999 ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); 1000 if (extoff) { 1001 /* Write protocol into next field of last extension header. */ 1002 m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt), 1003 &off); 1004 KASSERT(m, ("%s: short mbuf chain", __func__)); 1005 *(mtod(m, char *) + off) = proto; 1006 m = *m0; 1007 } else 1008 ip6->ip6_nxt = proto; 1009 1010 if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { 1011 DPFPRINTF(PF_DEBUG_MISC, "drop: too big: %d", total); 1012 ip6->ip6_plen = 0; 1013 REASON_SET(reason, PFRES_SHORT); 1014 /* PF_DROP requires a valid mbuf *m0 in pf_test6(). */ 1015 return (PF_DROP); 1016 } 1017 1018 DPFPRINTF(PF_DEBUG_MISC, "complete: %p(%d)", m, 1019 ntohs(ip6->ip6_plen)); 1020 return (PF_PASS); 1021 1022 fail: 1023 REASON_SET(reason, PFRES_MEMORY); 1024 /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */ 1025 return (PF_DROP); 1026 } 1027 #endif /* INET6 */ 1028 1029 #ifdef INET6 1030 int 1031 pf_max_frag_size(struct mbuf *m) 1032 { 1033 struct m_tag *tag; 1034 struct pf_fragment_tag *ftag; 1035 1036 tag = m_tag_find(m, PACKET_TAG_PF_REASSEMBLED, NULL); 1037 if (tag == NULL) 1038 return (m->m_pkthdr.len); 1039 1040 ftag = (struct pf_fragment_tag *)(tag + 1); 1041 1042 return (ftag->ft_maxlen); 1043 } 1044 1045 int 1046 pf_refragment6(struct ifnet *ifp, struct mbuf **m0, struct m_tag *mtag, 1047 struct ifnet *rt, bool forward) 1048 { 1049 struct mbuf *m = *m0, *t; 1050 struct ip6_hdr *hdr; 1051 struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); 1052 struct pf_pdesc pd; 1053 uint32_t frag_id; 1054 uint16_t hdrlen, extoff, maxlen; 1055 uint8_t proto; 1056 int error, action; 1057 1058 hdrlen = ftag->ft_hdrlen; 1059 extoff = ftag->ft_extoff; 1060 maxlen = ftag->ft_maxlen; 1061 frag_id = ftag->ft_id; 1062 m_tag_delete(m, mtag); 1063 mtag = NULL; 1064 ftag = NULL; 1065 1066 if (extoff) { 1067 int off; 1068 1069 /* Use protocol from next field of last extension header */ 1070 m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt), 1071 &off); 1072 KASSERT((m != NULL), ("pf_refragment6: short mbuf chain")); 1073 proto = *(mtod(m, uint8_t *) + off); 1074 *(mtod(m, char *) + off) = IPPROTO_FRAGMENT; 1075 m = *m0; 1076 } else { 1077 hdr = mtod(m, struct ip6_hdr *); 1078 proto = hdr->ip6_nxt; 1079 hdr->ip6_nxt = IPPROTO_FRAGMENT; 1080 } 1081 1082 /* In case of link-local traffic we'll need a scope set. */ 1083 hdr = mtod(m, struct ip6_hdr *); 1084 1085 in6_setscope(&hdr->ip6_src, ifp, NULL); 1086 in6_setscope(&hdr->ip6_dst, ifp, NULL); 1087 1088 /* The MTU must be a multiple of 8 bytes, or we risk doing the 1089 * fragmentation wrong. */ 1090 maxlen = maxlen & ~7; 1091 1092 /* 1093 * Maxlen may be less than 8 if there was only a single 1094 * fragment. As it was fragmented before, add a fragment 1095 * header also for a single fragment. If total or maxlen 1096 * is less than 8, ip6_fragment() will return EMSGSIZE and 1097 * we drop the packet. 1098 */ 1099 error = ip6_fragment(ifp, m, hdrlen, proto, maxlen, frag_id); 1100 m = (*m0)->m_nextpkt; 1101 (*m0)->m_nextpkt = NULL; 1102 if (error == 0) { 1103 /* The first mbuf contains the unfragmented packet. */ 1104 m_freem(*m0); 1105 *m0 = NULL; 1106 action = PF_PASS; 1107 } else { 1108 /* Drop expects an mbuf to free. */ 1109 DPFPRINTF(PF_DEBUG_MISC, "refragment error %d", error); 1110 action = PF_DROP; 1111 } 1112 for (; m; m = t) { 1113 t = m->m_nextpkt; 1114 m->m_nextpkt = NULL; 1115 m->m_flags |= M_SKIP_FIREWALL; 1116 memset(&pd, 0, sizeof(pd)); 1117 pd.pf_mtag = pf_find_mtag(m); 1118 if (error != 0) { 1119 m_freem(m); 1120 continue; 1121 } 1122 if (rt != NULL) { 1123 struct sockaddr_in6 dst; 1124 hdr = mtod(m, struct ip6_hdr *); 1125 1126 bzero(&dst, sizeof(dst)); 1127 dst.sin6_family = AF_INET6; 1128 dst.sin6_len = sizeof(dst); 1129 dst.sin6_addr = hdr->ip6_dst; 1130 1131 if (m->m_pkthdr.len <= if_getmtu(ifp)) { 1132 nd6_output_ifp(rt, rt, m, &dst, NULL); 1133 } else { 1134 in6_ifstat_inc(ifp, ifs6_in_toobig); 1135 icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, 1136 if_getmtu(ifp)); 1137 } 1138 } else if (forward) { 1139 MPASS(m->m_pkthdr.rcvif != NULL); 1140 ip6_forward(m, 0); 1141 } else { 1142 (void)ip6_output(m, NULL, NULL, 0, NULL, NULL, 1143 NULL); 1144 } 1145 } 1146 1147 return (action); 1148 } 1149 #endif /* INET6 */ 1150 1151 #ifdef INET 1152 int 1153 pf_normalize_ip(u_short *reason, struct pf_pdesc *pd) 1154 { 1155 struct pf_krule *r; 1156 struct ip *h = mtod(pd->m, struct ip *); 1157 int mff = (ntohs(h->ip_off) & IP_MF); 1158 int hlen = h->ip_hl << 2; 1159 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 1160 u_int16_t max; 1161 int ip_len; 1162 int tag = -1; 1163 int verdict; 1164 bool scrub_compat; 1165 1166 PF_RULES_RASSERT(); 1167 1168 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1169 /* 1170 * Check if there are any scrub rules, matching or not. 1171 * Lack of scrub rules means: 1172 * - enforced packet normalization operation just like in OpenBSD 1173 * - fragment reassembly depends on V_pf_status.reass 1174 * With scrub rules: 1175 * - packet normalization is performed if there is a matching scrub rule 1176 * - fragment reassembly is performed if the matching rule has no 1177 * PFRULE_FRAGMENT_NOREASS flag 1178 */ 1179 scrub_compat = (r != NULL); 1180 while (r != NULL) { 1181 pf_counter_u64_add(&r->evaluations, 1); 1182 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot) 1183 r = r->skip[PF_SKIP_IFP]; 1184 else if (r->direction && r->direction != pd->dir) 1185 r = r->skip[PF_SKIP_DIR]; 1186 else if (r->af && r->af != AF_INET) 1187 r = r->skip[PF_SKIP_AF]; 1188 else if (r->proto && r->proto != h->ip_p) 1189 r = r->skip[PF_SKIP_PROTO]; 1190 else if (PF_MISMATCHAW(&r->src.addr, 1191 (struct pf_addr *)&h->ip_src.s_addr, AF_INET, 1192 r->src.neg, pd->kif, M_GETFIB(pd->m))) 1193 r = r->skip[PF_SKIP_SRC_ADDR]; 1194 else if (PF_MISMATCHAW(&r->dst.addr, 1195 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, 1196 r->dst.neg, NULL, M_GETFIB(pd->m))) 1197 r = r->skip[PF_SKIP_DST_ADDR]; 1198 else if (r->match_tag && !pf_match_tag(pd->m, r, &tag, 1199 pd->pf_mtag ? pd->pf_mtag->tag : 0)) 1200 r = TAILQ_NEXT(r, entries); 1201 else 1202 break; 1203 } 1204 1205 if (scrub_compat) { 1206 /* With scrub rules present IPv4 normalization happens only 1207 * if one of rules has matched and it's not a "no scrub" rule */ 1208 if (r == NULL || r->action == PF_NOSCRUB) 1209 return (PF_PASS); 1210 1211 pf_counter_u64_critical_enter(); 1212 pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); 1213 pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); 1214 pf_counter_u64_critical_exit(); 1215 pf_rule_to_actions(r, &pd->act); 1216 } 1217 1218 /* Check for illegal packets */ 1219 if (hlen < (int)sizeof(struct ip)) { 1220 REASON_SET(reason, PFRES_NORM); 1221 goto drop; 1222 } 1223 1224 if (hlen > ntohs(h->ip_len)) { 1225 REASON_SET(reason, PFRES_NORM); 1226 goto drop; 1227 } 1228 1229 /* Clear IP_DF if the rule uses the no-df option or we're in no-df mode */ 1230 if (((!scrub_compat && V_pf_status.reass & PF_REASS_NODF) || 1231 (r != NULL && r->rule_flag & PFRULE_NODF)) && 1232 (h->ip_off & htons(IP_DF)) 1233 ) { 1234 u_int16_t ip_off = h->ip_off; 1235 1236 h->ip_off &= htons(~IP_DF); 1237 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 1238 } 1239 1240 /* We will need other tests here */ 1241 if (!fragoff && !mff) 1242 goto no_fragment; 1243 1244 /* We're dealing with a fragment now. Don't allow fragments 1245 * with IP_DF to enter the cache. If the flag was cleared by 1246 * no-df above, fine. Otherwise drop it. 1247 */ 1248 if (h->ip_off & htons(IP_DF)) { 1249 DPFPRINTF(PF_DEBUG_MISC, "IP_DF"); 1250 goto bad; 1251 } 1252 1253 ip_len = ntohs(h->ip_len) - hlen; 1254 1255 /* All fragments are 8 byte aligned */ 1256 if (mff && (ip_len & 0x7)) { 1257 DPFPRINTF(PF_DEBUG_MISC, "mff and %d", ip_len); 1258 goto bad; 1259 } 1260 1261 /* Respect maximum length */ 1262 if (fragoff + ip_len > IP_MAXPACKET) { 1263 DPFPRINTF(PF_DEBUG_MISC, "max packet %d", fragoff + ip_len); 1264 goto bad; 1265 } 1266 1267 if ((!scrub_compat && V_pf_status.reass) || 1268 (r != NULL && !(r->rule_flag & PFRULE_FRAGMENT_NOREASS)) 1269 ) { 1270 max = fragoff + ip_len; 1271 1272 /* Fully buffer all of the fragments 1273 * Might return a completely reassembled mbuf, or NULL */ 1274 PF_FRAG_LOCK(); 1275 DPFPRINTF(PF_DEBUG_MISC, "reass frag %d @ %d-%d", 1276 h->ip_id, fragoff, max); 1277 verdict = pf_reassemble(&pd->m, reason); 1278 PF_FRAG_UNLOCK(); 1279 1280 if (verdict != PF_PASS) 1281 return (PF_DROP); 1282 1283 if (pd->m == NULL) 1284 return (PF_DROP); 1285 1286 h = mtod(pd->m, struct ip *); 1287 pd->tot_len = htons(h->ip_len); 1288 1289 no_fragment: 1290 /* At this point, only IP_DF is allowed in ip_off */ 1291 if (h->ip_off & ~htons(IP_DF)) { 1292 u_int16_t ip_off = h->ip_off; 1293 1294 h->ip_off &= htons(IP_DF); 1295 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 1296 } 1297 } 1298 1299 return (PF_PASS); 1300 1301 bad: 1302 DPFPRINTF(PF_DEBUG_MISC, "dropping bad fragment"); 1303 REASON_SET(reason, PFRES_FRAG); 1304 drop: 1305 if (r != NULL && r->log) 1306 PFLOG_PACKET(PF_DROP, *reason, r, NULL, NULL, pd, 1, NULL); 1307 1308 return (PF_DROP); 1309 } 1310 #endif 1311 1312 #ifdef INET6 1313 int 1314 pf_normalize_ip6(int off, u_short *reason, 1315 struct pf_pdesc *pd) 1316 { 1317 struct pf_krule *r; 1318 struct ip6_hdr *h; 1319 struct ip6_frag frag; 1320 bool scrub_compat; 1321 1322 PF_RULES_RASSERT(); 1323 1324 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1325 /* 1326 * Check if there are any scrub rules, matching or not. 1327 * Lack of scrub rules means: 1328 * - enforced packet normalization operation just like in OpenBSD 1329 * With scrub rules: 1330 * - packet normalization is performed if there is a matching scrub rule 1331 * XXX: Fragment reassembly always performed for IPv6! 1332 */ 1333 scrub_compat = (r != NULL); 1334 while (r != NULL) { 1335 pf_counter_u64_add(&r->evaluations, 1); 1336 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot) 1337 r = r->skip[PF_SKIP_IFP]; 1338 else if (r->direction && r->direction != pd->dir) 1339 r = r->skip[PF_SKIP_DIR]; 1340 else if (r->af && r->af != AF_INET6) 1341 r = r->skip[PF_SKIP_AF]; 1342 else if (r->proto && r->proto != pd->proto) 1343 r = r->skip[PF_SKIP_PROTO]; 1344 else if (PF_MISMATCHAW(&r->src.addr, 1345 (struct pf_addr *)&pd->src, AF_INET6, 1346 r->src.neg, pd->kif, M_GETFIB(pd->m))) 1347 r = r->skip[PF_SKIP_SRC_ADDR]; 1348 else if (PF_MISMATCHAW(&r->dst.addr, 1349 (struct pf_addr *)&pd->dst, AF_INET6, 1350 r->dst.neg, NULL, M_GETFIB(pd->m))) 1351 r = r->skip[PF_SKIP_DST_ADDR]; 1352 else 1353 break; 1354 } 1355 1356 if (scrub_compat) { 1357 /* With scrub rules present IPv6 normalization happens only 1358 * if one of rules has matched and it's not a "no scrub" rule */ 1359 if (r == NULL || r->action == PF_NOSCRUB) 1360 return (PF_PASS); 1361 1362 pf_counter_u64_critical_enter(); 1363 pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); 1364 pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); 1365 pf_counter_u64_critical_exit(); 1366 pf_rule_to_actions(r, &pd->act); 1367 } 1368 1369 if (!pf_pull_hdr(pd->m, off, &frag, sizeof(frag), reason, AF_INET6)) 1370 return (PF_DROP); 1371 1372 /* Offset now points to data portion. */ 1373 off += sizeof(frag); 1374 1375 if (pd->virtual_proto == PF_VPROTO_FRAGMENT) { 1376 /* Returns PF_DROP or *m0 is NULL or completely reassembled 1377 * mbuf. */ 1378 if (pf_reassemble6(&pd->m, &frag, off, pd->extoff, reason) != PF_PASS) 1379 return (PF_DROP); 1380 if (pd->m == NULL) 1381 return (PF_DROP); 1382 h = mtod(pd->m, struct ip6_hdr *); 1383 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 1384 } 1385 1386 return (PF_PASS); 1387 } 1388 #endif /* INET6 */ 1389 1390 int 1391 pf_normalize_tcp(struct pf_pdesc *pd) 1392 { 1393 struct pf_krule *r, *rm = NULL; 1394 struct tcphdr *th = &pd->hdr.tcp; 1395 int rewrite = 0; 1396 u_short reason; 1397 u_int16_t flags; 1398 sa_family_t af = pd->af; 1399 int srs; 1400 1401 PF_RULES_RASSERT(); 1402 1403 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 1404 /* Check if there any scrub rules. Lack of scrub rules means enforced 1405 * packet normalization operation just like in OpenBSD. */ 1406 srs = (r != NULL); 1407 while (r != NULL) { 1408 pf_counter_u64_add(&r->evaluations, 1); 1409 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot) 1410 r = r->skip[PF_SKIP_IFP]; 1411 else if (r->direction && r->direction != pd->dir) 1412 r = r->skip[PF_SKIP_DIR]; 1413 else if (r->af && r->af != af) 1414 r = r->skip[PF_SKIP_AF]; 1415 else if (r->proto && r->proto != pd->proto) 1416 r = r->skip[PF_SKIP_PROTO]; 1417 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 1418 r->src.neg, pd->kif, M_GETFIB(pd->m))) 1419 r = r->skip[PF_SKIP_SRC_ADDR]; 1420 else if (r->src.port_op && !pf_match_port(r->src.port_op, 1421 r->src.port[0], r->src.port[1], th->th_sport)) 1422 r = r->skip[PF_SKIP_SRC_PORT]; 1423 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 1424 r->dst.neg, NULL, M_GETFIB(pd->m))) 1425 r = r->skip[PF_SKIP_DST_ADDR]; 1426 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 1427 r->dst.port[0], r->dst.port[1], th->th_dport)) 1428 r = r->skip[PF_SKIP_DST_PORT]; 1429 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match( 1430 pf_osfp_fingerprint(pd, th), 1431 r->os_fingerprint)) 1432 r = TAILQ_NEXT(r, entries); 1433 else { 1434 rm = r; 1435 break; 1436 } 1437 } 1438 1439 if (srs) { 1440 /* With scrub rules present TCP normalization happens only 1441 * if one of rules has matched and it's not a "no scrub" rule */ 1442 if (rm == NULL || rm->action == PF_NOSCRUB) 1443 return (PF_PASS); 1444 1445 pf_counter_u64_critical_enter(); 1446 pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); 1447 pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); 1448 pf_counter_u64_critical_exit(); 1449 pf_rule_to_actions(rm, &pd->act); 1450 } 1451 1452 if (rm && rm->rule_flag & PFRULE_REASSEMBLE_TCP) 1453 pd->flags |= PFDESC_TCP_NORM; 1454 1455 flags = tcp_get_flags(th); 1456 if (flags & TH_SYN) { 1457 /* Illegal packet */ 1458 if (flags & TH_RST) 1459 goto tcp_drop; 1460 1461 if (flags & TH_FIN) 1462 goto tcp_drop; 1463 } else { 1464 /* Illegal packet */ 1465 if (!(flags & (TH_ACK|TH_RST))) 1466 goto tcp_drop; 1467 } 1468 1469 if (!(flags & TH_ACK)) { 1470 /* These flags are only valid if ACK is set */ 1471 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 1472 goto tcp_drop; 1473 } 1474 1475 /* Check for illegal header length */ 1476 if (th->th_off < (sizeof(struct tcphdr) >> 2)) 1477 goto tcp_drop; 1478 1479 /* If flags changed, or reserved data set, then adjust */ 1480 if (flags != tcp_get_flags(th) || 1481 (tcp_get_flags(th) & (TH_RES1|TH_RES2|TH_RES2)) != 0) { 1482 u_int16_t ov, nv; 1483 1484 ov = *(u_int16_t *)(&th->th_ack + 1); 1485 flags &= ~(TH_RES1 | TH_RES2 | TH_RES3); 1486 tcp_set_flags(th, flags); 1487 nv = *(u_int16_t *)(&th->th_ack + 1); 1488 1489 th->th_sum = pf_proto_cksum_fixup(pd->m, th->th_sum, ov, nv, 0); 1490 rewrite = 1; 1491 } 1492 1493 /* Remove urgent pointer, if TH_URG is not set */ 1494 if (!(flags & TH_URG) && th->th_urp) { 1495 th->th_sum = pf_proto_cksum_fixup(pd->m, th->th_sum, th->th_urp, 1496 0, 0); 1497 th->th_urp = 0; 1498 rewrite = 1; 1499 } 1500 1501 /* copy back packet headers if we sanitized */ 1502 if (rewrite) 1503 m_copyback(pd->m, pd->off, sizeof(*th), (caddr_t)th); 1504 1505 return (PF_PASS); 1506 1507 tcp_drop: 1508 REASON_SET(&reason, PFRES_NORM); 1509 if (rm != NULL && r->log) 1510 PFLOG_PACKET(PF_DROP, reason, r, NULL, NULL, pd, 1, NULL); 1511 return (PF_DROP); 1512 } 1513 1514 int 1515 pf_normalize_tcp_init(struct pf_pdesc *pd, struct tcphdr *th, 1516 struct pf_state_peer *src) 1517 { 1518 u_int32_t tsval, tsecr; 1519 int olen; 1520 uint8_t opts[MAX_TCPOPTLEN], *opt; 1521 1522 KASSERT((src->scrub == NULL), 1523 ("pf_normalize_tcp_init: src->scrub != NULL")); 1524 1525 src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT); 1526 if (src->scrub == NULL) 1527 return (1); 1528 1529 switch (pd->af) { 1530 #ifdef INET 1531 case AF_INET: { 1532 struct ip *h = mtod(pd->m, struct ip *); 1533 src->scrub->pfss_ttl = h->ip_ttl; 1534 break; 1535 } 1536 #endif /* INET */ 1537 #ifdef INET6 1538 case AF_INET6: { 1539 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 1540 src->scrub->pfss_ttl = h->ip6_hlim; 1541 break; 1542 } 1543 #endif /* INET6 */ 1544 default: 1545 unhandled_af(pd->af); 1546 } 1547 1548 /* 1549 * All normalizations below are only begun if we see the start of 1550 * the connections. They must all set an enabled bit in pfss_flags 1551 */ 1552 if ((tcp_get_flags(th) & TH_SYN) == 0) 1553 return (0); 1554 1555 olen = (th->th_off << 2) - sizeof(*th); 1556 if (olen < TCPOLEN_TIMESTAMP || !pf_pull_hdr(pd->m, 1557 pd->off + sizeof(*th), opts, olen, NULL, pd->af)) 1558 return (0); 1559 1560 opt = opts; 1561 while ((opt = pf_find_tcpopt(opt, opts, olen, 1562 TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) { 1563 src->scrub->pfss_flags |= PFSS_TIMESTAMP; 1564 src->scrub->pfss_ts_mod = arc4random(); 1565 /* note PFSS_PAWS not set yet */ 1566 memcpy(&tsval, &opt[2], sizeof(u_int32_t)); 1567 memcpy(&tsecr, &opt[6], sizeof(u_int32_t)); 1568 src->scrub->pfss_tsval0 = ntohl(tsval); 1569 src->scrub->pfss_tsval = ntohl(tsval); 1570 src->scrub->pfss_tsecr = ntohl(tsecr); 1571 getmicrouptime(&src->scrub->pfss_last); 1572 1573 opt += opt[1]; 1574 } 1575 1576 return (0); 1577 } 1578 1579 void 1580 pf_normalize_tcp_cleanup(struct pf_kstate *state) 1581 { 1582 /* XXX Note: this also cleans up SCTP. */ 1583 uma_zfree(V_pf_state_scrub_z, state->src.scrub); 1584 uma_zfree(V_pf_state_scrub_z, state->dst.scrub); 1585 1586 /* Someday... flush the TCP segment reassembly descriptors. */ 1587 } 1588 int 1589 pf_normalize_sctp_init(struct pf_pdesc *pd, struct pf_state_peer *src, 1590 struct pf_state_peer *dst) 1591 { 1592 src->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT); 1593 if (src->scrub == NULL) 1594 return (1); 1595 1596 dst->scrub = uma_zalloc(V_pf_state_scrub_z, M_ZERO | M_NOWAIT); 1597 if (dst->scrub == NULL) { 1598 uma_zfree(V_pf_state_scrub_z, src); 1599 return (1); 1600 } 1601 1602 dst->scrub->pfss_v_tag = pd->sctp_initiate_tag; 1603 1604 return (0); 1605 } 1606 1607 int 1608 pf_normalize_tcp_stateful(struct pf_pdesc *pd, 1609 u_short *reason, struct tcphdr *th, struct pf_kstate *state, 1610 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) 1611 { 1612 struct timeval uptime; 1613 u_int tsval_from_last; 1614 uint32_t tsval, tsecr; 1615 int copyback = 0; 1616 int got_ts = 0; 1617 int olen; 1618 uint8_t opts[MAX_TCPOPTLEN], *opt; 1619 1620 KASSERT((src->scrub || dst->scrub), 1621 ("%s: src->scrub && dst->scrub!", __func__)); 1622 1623 /* 1624 * Enforce the minimum TTL seen for this connection. Negate a common 1625 * technique to evade an intrusion detection system and confuse 1626 * firewall state code. 1627 */ 1628 switch (pd->af) { 1629 #ifdef INET 1630 case AF_INET: { 1631 if (src->scrub) { 1632 struct ip *h = mtod(pd->m, struct ip *); 1633 if (h->ip_ttl > src->scrub->pfss_ttl) 1634 src->scrub->pfss_ttl = h->ip_ttl; 1635 h->ip_ttl = src->scrub->pfss_ttl; 1636 } 1637 break; 1638 } 1639 #endif /* INET */ 1640 #ifdef INET6 1641 case AF_INET6: { 1642 if (src->scrub) { 1643 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 1644 if (h->ip6_hlim > src->scrub->pfss_ttl) 1645 src->scrub->pfss_ttl = h->ip6_hlim; 1646 h->ip6_hlim = src->scrub->pfss_ttl; 1647 } 1648 break; 1649 } 1650 #endif /* INET6 */ 1651 default: 1652 unhandled_af(pd->af); 1653 } 1654 1655 olen = (th->th_off << 2) - sizeof(*th); 1656 1657 if (olen >= TCPOLEN_TIMESTAMP && 1658 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1659 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1660 pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, olen, NULL, pd->af)) { 1661 /* Modulate the timestamps. Can be used for NAT detection, OS 1662 * uptime determination or reboot detection. 1663 */ 1664 opt = opts; 1665 while ((opt = pf_find_tcpopt(opt, opts, olen, 1666 TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) { 1667 uint8_t *ts = opt + 2; 1668 uint8_t *tsr = opt + 6; 1669 1670 if (got_ts) { 1671 /* Huh? Multiple timestamps!? */ 1672 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1673 printf("pf: %s: multiple TS??", __func__); 1674 pf_print_state(state); 1675 printf("\n"); 1676 } 1677 REASON_SET(reason, PFRES_TS); 1678 return (PF_DROP); 1679 } 1680 1681 memcpy(&tsval, ts, sizeof(u_int32_t)); 1682 memcpy(&tsecr, tsr, sizeof(u_int32_t)); 1683 1684 /* modulate TS */ 1685 if (tsval && src->scrub && 1686 (src->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1687 /* tsval used further on */ 1688 tsval = ntohl(tsval); 1689 pf_patch_32(pd, 1690 ts, htonl(tsval + src->scrub->pfss_ts_mod), 1691 PF_ALGNMNT(ts - opts)); 1692 copyback = 1; 1693 } 1694 1695 /* modulate TS reply if any (!0) */ 1696 if (tsecr && dst->scrub && 1697 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1698 /* tsecr used further on */ 1699 tsecr = ntohl(tsecr) - dst->scrub->pfss_ts_mod; 1700 pf_patch_32(pd, tsr, htonl(tsecr), 1701 PF_ALGNMNT(tsr - opts)); 1702 copyback = 1; 1703 } 1704 1705 got_ts = 1; 1706 opt += opt[1]; 1707 } 1708 1709 if (copyback) { 1710 /* Copyback the options, caller copys back header */ 1711 *writeback = 1; 1712 m_copyback(pd->m, pd->off + sizeof(*th), olen, opts); 1713 } 1714 } 1715 1716 /* 1717 * Must invalidate PAWS checks on connections idle for too long. 1718 * The fastest allowed timestamp clock is 1ms. That turns out to 1719 * be about 24 days before it wraps. XXX Right now our lowerbound 1720 * TS echo check only works for the first 12 days of a connection 1721 * when the TS has exhausted half its 32bit space 1722 */ 1723 #define TS_MAX_IDLE (24*24*60*60) 1724 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1725 1726 getmicrouptime(&uptime); 1727 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1728 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1729 time_uptime - (state->creation / 1000) > TS_MAX_CONN)) { 1730 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1731 DPFPRINTF(PF_DEBUG_MISC, "src idled out of PAWS"); 1732 pf_print_state(state); 1733 printf("\n"); 1734 } 1735 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1736 | PFSS_PAWS_IDLED; 1737 } 1738 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1739 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1740 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1741 DPFPRINTF(PF_DEBUG_MISC, "dst idled out of PAWS"); 1742 pf_print_state(state); 1743 printf("\n"); 1744 } 1745 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1746 | PFSS_PAWS_IDLED; 1747 } 1748 1749 if (got_ts && src->scrub && dst->scrub && 1750 (src->scrub->pfss_flags & PFSS_PAWS) && 1751 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1752 /* Validate that the timestamps are "in-window". 1753 * RFC1323 describes TCP Timestamp options that allow 1754 * measurement of RTT (round trip time) and PAWS 1755 * (protection against wrapped sequence numbers). PAWS 1756 * gives us a set of rules for rejecting packets on 1757 * long fat pipes (packets that were somehow delayed 1758 * in transit longer than the time it took to send the 1759 * full TCP sequence space of 4Gb). We can use these 1760 * rules and infer a few others that will let us treat 1761 * the 32bit timestamp and the 32bit echoed timestamp 1762 * as sequence numbers to prevent a blind attacker from 1763 * inserting packets into a connection. 1764 * 1765 * RFC1323 tells us: 1766 * - The timestamp on this packet must be greater than 1767 * or equal to the last value echoed by the other 1768 * endpoint. The RFC says those will be discarded 1769 * since it is a dup that has already been acked. 1770 * This gives us a lowerbound on the timestamp. 1771 * timestamp >= other last echoed timestamp 1772 * - The timestamp will be less than or equal to 1773 * the last timestamp plus the time between the 1774 * last packet and now. The RFC defines the max 1775 * clock rate as 1ms. We will allow clocks to be 1776 * up to 10% fast and will allow a total difference 1777 * or 30 seconds due to a route change. And this 1778 * gives us an upperbound on the timestamp. 1779 * timestamp <= last timestamp + max ticks 1780 * We have to be careful here. Windows will send an 1781 * initial timestamp of zero and then initialize it 1782 * to a random value after the 3whs; presumably to 1783 * avoid a DoS by having to call an expensive RNG 1784 * during a SYN flood. Proof MS has at least one 1785 * good security geek. 1786 * 1787 * - The TCP timestamp option must also echo the other 1788 * endpoints timestamp. The timestamp echoed is the 1789 * one carried on the earliest unacknowledged segment 1790 * on the left edge of the sequence window. The RFC 1791 * states that the host will reject any echoed 1792 * timestamps that were larger than any ever sent. 1793 * This gives us an upperbound on the TS echo. 1794 * tescr <= largest_tsval 1795 * - The lowerbound on the TS echo is a little more 1796 * tricky to determine. The other endpoint's echoed 1797 * values will not decrease. But there may be 1798 * network conditions that re-order packets and 1799 * cause our view of them to decrease. For now the 1800 * only lowerbound we can safely determine is that 1801 * the TS echo will never be less than the original 1802 * TS. XXX There is probably a better lowerbound. 1803 * Remove TS_MAX_CONN with better lowerbound check. 1804 * tescr >= other original TS 1805 * 1806 * It is also important to note that the fastest 1807 * timestamp clock of 1ms will wrap its 32bit space in 1808 * 24 days. So we just disable TS checking after 24 1809 * days of idle time. We actually must use a 12d 1810 * connection limit until we can come up with a better 1811 * lowerbound to the TS echo check. 1812 */ 1813 struct timeval delta_ts; 1814 int ts_fudge; 1815 1816 /* 1817 * PFTM_TS_DIFF is how many seconds of leeway to allow 1818 * a host's timestamp. This can happen if the previous 1819 * packet got delayed in transit for much longer than 1820 * this packet. 1821 */ 1822 if ((ts_fudge = state->rule->timeout[PFTM_TS_DIFF]) == 0) 1823 ts_fudge = V_pf_default_rule.timeout[PFTM_TS_DIFF]; 1824 1825 /* Calculate max ticks since the last timestamp */ 1826 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1827 #define TS_MICROSECS 1000000 /* microseconds per second */ 1828 delta_ts = uptime; 1829 timevalsub(&delta_ts, &src->scrub->pfss_last); 1830 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1831 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1832 1833 if ((src->state >= TCPS_ESTABLISHED && 1834 dst->state >= TCPS_ESTABLISHED) && 1835 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1836 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1837 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1838 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1839 /* Bad RFC1323 implementation or an insertion attack. 1840 * 1841 * - Solaris 2.6 and 2.7 are known to send another ACK 1842 * after the FIN,FIN|ACK,ACK closing that carries 1843 * an old timestamp. 1844 */ 1845 1846 DPFPRINTF(PF_DEBUG_MISC, "Timestamp failed %c%c%c%c", 1847 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1848 SEQ_GT(tsval, src->scrub->pfss_tsval + 1849 tsval_from_last) ? '1' : ' ', 1850 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1851 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '); 1852 DPFPRINTF(PF_DEBUG_MISC, " tsval: %u tsecr: %u +ticks: " 1853 "%u idle: %jus %lums", 1854 tsval, tsecr, tsval_from_last, 1855 (uintmax_t)delta_ts.tv_sec, 1856 delta_ts.tv_usec / 1000); 1857 DPFPRINTF(PF_DEBUG_MISC, " src->tsval: %u tsecr: %u", 1858 src->scrub->pfss_tsval, src->scrub->pfss_tsecr); 1859 DPFPRINTF(PF_DEBUG_MISC, " dst->tsval: %u tsecr: %u " 1860 "tsval0: %u", dst->scrub->pfss_tsval, 1861 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0); 1862 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1863 pf_print_state(state); 1864 pf_print_flags(tcp_get_flags(th)); 1865 printf("\n"); 1866 } 1867 REASON_SET(reason, PFRES_TS); 1868 return (PF_DROP); 1869 } 1870 1871 /* XXX I'd really like to require tsecr but it's optional */ 1872 1873 } else if (!got_ts && (tcp_get_flags(th) & TH_RST) == 0 && 1874 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1875 || pd->p_len > 0 || (tcp_get_flags(th) & TH_SYN)) && 1876 src->scrub && dst->scrub && 1877 (src->scrub->pfss_flags & PFSS_PAWS) && 1878 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1879 /* Didn't send a timestamp. Timestamps aren't really useful 1880 * when: 1881 * - connection opening or closing (often not even sent). 1882 * but we must not let an attacker to put a FIN on a 1883 * data packet to sneak it through our ESTABLISHED check. 1884 * - on a TCP reset. RFC suggests not even looking at TS. 1885 * - on an empty ACK. The TS will not be echoed so it will 1886 * probably not help keep the RTT calculation in sync and 1887 * there isn't as much danger when the sequence numbers 1888 * got wrapped. So some stacks don't include TS on empty 1889 * ACKs :-( 1890 * 1891 * To minimize the disruption to mostly RFC1323 conformant 1892 * stacks, we will only require timestamps on data packets. 1893 * 1894 * And what do ya know, we cannot require timestamps on data 1895 * packets. There appear to be devices that do legitimate 1896 * TCP connection hijacking. There are HTTP devices that allow 1897 * a 3whs (with timestamps) and then buffer the HTTP request. 1898 * If the intermediate device has the HTTP response cache, it 1899 * will spoof the response but not bother timestamping its 1900 * packets. So we can look for the presence of a timestamp in 1901 * the first data packet and if there, require it in all future 1902 * packets. 1903 */ 1904 1905 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1906 /* 1907 * Hey! Someone tried to sneak a packet in. Or the 1908 * stack changed its RFC1323 behavior?!?! 1909 */ 1910 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1911 DPFPRINTF(PF_DEBUG_MISC, "Did not receive expected " 1912 "RFC1323 timestamp"); 1913 pf_print_state(state); 1914 pf_print_flags(tcp_get_flags(th)); 1915 printf("\n"); 1916 } 1917 REASON_SET(reason, PFRES_TS); 1918 return (PF_DROP); 1919 } 1920 } 1921 1922 /* 1923 * We will note if a host sends his data packets with or without 1924 * timestamps. And require all data packets to contain a timestamp 1925 * if the first does. PAWS implicitly requires that all data packets be 1926 * timestamped. But I think there are middle-man devices that hijack 1927 * TCP streams immediately after the 3whs and don't timestamp their 1928 * packets (seen in a WWW accelerator or cache). 1929 */ 1930 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1931 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1932 if (got_ts) 1933 src->scrub->pfss_flags |= PFSS_DATA_TS; 1934 else { 1935 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1936 if (V_pf_status.debug >= PF_DEBUG_MISC && dst->scrub && 1937 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1938 /* Don't warn if other host rejected RFC1323 */ 1939 DPFPRINTF(PF_DEBUG_MISC, "Broken RFC1323 stack did " 1940 "not timestamp data packet. Disabled PAWS " 1941 "security."); 1942 pf_print_state(state); 1943 pf_print_flags(tcp_get_flags(th)); 1944 printf("\n"); 1945 } 1946 } 1947 } 1948 1949 /* 1950 * Update PAWS values 1951 */ 1952 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1953 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1954 getmicrouptime(&src->scrub->pfss_last); 1955 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1956 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1957 src->scrub->pfss_tsval = tsval; 1958 1959 if (tsecr) { 1960 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1961 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1962 src->scrub->pfss_tsecr = tsecr; 1963 1964 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1965 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1966 src->scrub->pfss_tsval0 == 0)) { 1967 /* tsval0 MUST be the lowest timestamp */ 1968 src->scrub->pfss_tsval0 = tsval; 1969 } 1970 1971 /* Only fully initialized after a TS gets echoed */ 1972 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1973 src->scrub->pfss_flags |= PFSS_PAWS; 1974 } 1975 } 1976 1977 /* I have a dream.... TCP segment reassembly.... */ 1978 return (0); 1979 } 1980 1981 int 1982 pf_normalize_mss(struct pf_pdesc *pd) 1983 { 1984 int olen, optsoff; 1985 uint8_t opts[MAX_TCPOPTLEN], *opt; 1986 1987 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 1988 optsoff = pd->off + sizeof(struct tcphdr); 1989 if (olen < TCPOLEN_MAXSEG || 1990 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) 1991 return (0); 1992 1993 opt = opts; 1994 while ((opt = pf_find_tcpopt(opt, opts, olen, 1995 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 1996 uint16_t mss; 1997 uint8_t *mssp = opt + 2; 1998 memcpy(&mss, mssp, sizeof(mss)); 1999 if (ntohs(mss) > pd->act.max_mss) { 2000 size_t mssoffopts = mssp - opts; 2001 pf_patch_16(pd, &mss, 2002 htons(pd->act.max_mss), PF_ALGNMNT(mssoffopts)); 2003 m_copyback(pd->m, optsoff + mssoffopts, 2004 sizeof(mss), (caddr_t)&mss); 2005 m_copyback(pd->m, pd->off, 2006 sizeof(struct tcphdr), (caddr_t)&pd->hdr.tcp); 2007 } 2008 2009 opt += opt[1]; 2010 } 2011 2012 return (0); 2013 } 2014 2015 int 2016 pf_scan_sctp(struct pf_pdesc *pd) 2017 { 2018 struct sctp_chunkhdr ch = { }; 2019 int chunk_off = sizeof(struct sctphdr); 2020 int chunk_start; 2021 int ret; 2022 2023 while (pd->off + chunk_off < pd->tot_len) { 2024 if (!pf_pull_hdr(pd->m, pd->off + chunk_off, &ch, sizeof(ch), 2025 NULL, pd->af)) 2026 return (PF_DROP); 2027 2028 /* Length includes the header, this must be at least 4. */ 2029 if (ntohs(ch.chunk_length) < 4) 2030 return (PF_DROP); 2031 2032 chunk_start = chunk_off; 2033 chunk_off += roundup(ntohs(ch.chunk_length), 4); 2034 2035 switch (ch.chunk_type) { 2036 case SCTP_INITIATION: 2037 case SCTP_INITIATION_ACK: { 2038 struct sctp_init_chunk init; 2039 2040 if (!pf_pull_hdr(pd->m, pd->off + chunk_start, &init, 2041 sizeof(init), NULL, pd->af)) 2042 return (PF_DROP); 2043 2044 /* 2045 * RFC 9620, Section 3.3.2, "The Initiate Tag is allowed to have 2046 * any value except 0." 2047 */ 2048 if (init.init.initiate_tag == 0) 2049 return (PF_DROP); 2050 if (init.init.num_inbound_streams == 0) 2051 return (PF_DROP); 2052 if (init.init.num_outbound_streams == 0) 2053 return (PF_DROP); 2054 if (ntohl(init.init.a_rwnd) < SCTP_MIN_RWND) 2055 return (PF_DROP); 2056 2057 /* 2058 * RFC 9260, Section 3.1, INIT chunks MUST have zero 2059 * verification tag. 2060 */ 2061 if (ch.chunk_type == SCTP_INITIATION && 2062 pd->hdr.sctp.v_tag != 0) 2063 return (PF_DROP); 2064 2065 pd->sctp_initiate_tag = init.init.initiate_tag; 2066 2067 if (ch.chunk_type == SCTP_INITIATION) 2068 pd->sctp_flags |= PFDESC_SCTP_INIT; 2069 else 2070 pd->sctp_flags |= PFDESC_SCTP_INIT_ACK; 2071 2072 ret = pf_multihome_scan_init(pd->off + chunk_start, 2073 ntohs(init.ch.chunk_length), pd); 2074 if (ret != PF_PASS) 2075 return (ret); 2076 2077 break; 2078 } 2079 case SCTP_ABORT_ASSOCIATION: 2080 pd->sctp_flags |= PFDESC_SCTP_ABORT; 2081 break; 2082 case SCTP_SHUTDOWN: 2083 case SCTP_SHUTDOWN_ACK: 2084 pd->sctp_flags |= PFDESC_SCTP_SHUTDOWN; 2085 break; 2086 case SCTP_SHUTDOWN_COMPLETE: 2087 pd->sctp_flags |= PFDESC_SCTP_SHUTDOWN_COMPLETE; 2088 break; 2089 case SCTP_COOKIE_ECHO: 2090 pd->sctp_flags |= PFDESC_SCTP_COOKIE; 2091 break; 2092 case SCTP_COOKIE_ACK: 2093 pd->sctp_flags |= PFDESC_SCTP_COOKIE_ACK; 2094 break; 2095 case SCTP_DATA: 2096 pd->sctp_flags |= PFDESC_SCTP_DATA; 2097 break; 2098 case SCTP_HEARTBEAT_REQUEST: 2099 pd->sctp_flags |= PFDESC_SCTP_HEARTBEAT; 2100 break; 2101 case SCTP_HEARTBEAT_ACK: 2102 pd->sctp_flags |= PFDESC_SCTP_HEARTBEAT_ACK; 2103 break; 2104 case SCTP_ASCONF: 2105 pd->sctp_flags |= PFDESC_SCTP_ASCONF; 2106 2107 ret = pf_multihome_scan_asconf(pd->off + chunk_start, 2108 ntohs(ch.chunk_length), pd); 2109 if (ret != PF_PASS) 2110 return (ret); 2111 break; 2112 default: 2113 pd->sctp_flags |= PFDESC_SCTP_OTHER; 2114 break; 2115 } 2116 } 2117 2118 /* Validate chunk lengths vs. packet length. */ 2119 if (pd->off + chunk_off != pd->tot_len) 2120 return (PF_DROP); 2121 2122 /* 2123 * INIT, INIT_ACK or SHUTDOWN_COMPLETE chunks must always be the only 2124 * one in a packet. 2125 */ 2126 if ((pd->sctp_flags & PFDESC_SCTP_INIT) && 2127 (pd->sctp_flags & ~PFDESC_SCTP_INIT)) 2128 return (PF_DROP); 2129 if ((pd->sctp_flags & PFDESC_SCTP_INIT_ACK) && 2130 (pd->sctp_flags & ~PFDESC_SCTP_INIT_ACK)) 2131 return (PF_DROP); 2132 if ((pd->sctp_flags & PFDESC_SCTP_SHUTDOWN_COMPLETE) && 2133 (pd->sctp_flags & ~PFDESC_SCTP_SHUTDOWN_COMPLETE)) 2134 return (PF_DROP); 2135 if ((pd->sctp_flags & PFDESC_SCTP_ABORT) && 2136 (pd->sctp_flags & PFDESC_SCTP_DATA)) { 2137 /* 2138 * RFC4960 3.3.7: DATA chunks MUST NOT be 2139 * bundled with ABORT. 2140 */ 2141 return (PF_DROP); 2142 } 2143 2144 return (PF_PASS); 2145 } 2146 2147 int 2148 pf_normalize_sctp(struct pf_pdesc *pd) 2149 { 2150 struct pf_krule *r, *rm = NULL; 2151 struct sctphdr *sh = &pd->hdr.sctp; 2152 u_short reason; 2153 sa_family_t af = pd->af; 2154 int srs; 2155 2156 PF_RULES_RASSERT(); 2157 2158 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); 2159 /* Check if there any scrub rules. Lack of scrub rules means enforced 2160 * packet normalization operation just like in OpenBSD. */ 2161 srs = (r != NULL); 2162 while (r != NULL) { 2163 pf_counter_u64_add(&r->evaluations, 1); 2164 if (pfi_kkif_match(r->kif, pd->kif) == r->ifnot) 2165 r = r->skip[PF_SKIP_IFP]; 2166 else if (r->direction && r->direction != pd->dir) 2167 r = r->skip[PF_SKIP_DIR]; 2168 else if (r->af && r->af != af) 2169 r = r->skip[PF_SKIP_AF]; 2170 else if (r->proto && r->proto != pd->proto) 2171 r = r->skip[PF_SKIP_PROTO]; 2172 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 2173 r->src.neg, pd->kif, M_GETFIB(pd->m))) 2174 r = r->skip[PF_SKIP_SRC_ADDR]; 2175 else if (r->src.port_op && !pf_match_port(r->src.port_op, 2176 r->src.port[0], r->src.port[1], sh->src_port)) 2177 r = r->skip[PF_SKIP_SRC_PORT]; 2178 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 2179 r->dst.neg, NULL, M_GETFIB(pd->m))) 2180 r = r->skip[PF_SKIP_DST_ADDR]; 2181 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 2182 r->dst.port[0], r->dst.port[1], sh->dest_port)) 2183 r = r->skip[PF_SKIP_DST_PORT]; 2184 else { 2185 rm = r; 2186 break; 2187 } 2188 } 2189 2190 if (srs) { 2191 /* With scrub rules present SCTP normalization happens only 2192 * if one of rules has matched and it's not a "no scrub" rule */ 2193 if (rm == NULL || rm->action == PF_NOSCRUB) 2194 return (PF_PASS); 2195 2196 pf_counter_u64_critical_enter(); 2197 pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); 2198 pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); 2199 pf_counter_u64_critical_exit(); 2200 } 2201 2202 /* Verify we're a multiple of 4 bytes long */ 2203 if ((pd->tot_len - pd->off - sizeof(struct sctphdr)) % 4) 2204 goto sctp_drop; 2205 2206 /* INIT chunk needs to be the only chunk */ 2207 if (pd->sctp_flags & PFDESC_SCTP_INIT) 2208 if (pd->sctp_flags & ~PFDESC_SCTP_INIT) 2209 goto sctp_drop; 2210 2211 return (PF_PASS); 2212 2213 sctp_drop: 2214 REASON_SET(&reason, PFRES_NORM); 2215 if (rm != NULL && r->log) 2216 PFLOG_PACKET(PF_DROP, reason, r, NULL, NULL, pd, 2217 1, NULL); 2218 2219 return (PF_DROP); 2220 } 2221 2222 #if defined(INET) || defined(INET6) 2223 void 2224 pf_scrub(struct pf_pdesc *pd) 2225 { 2226 2227 struct ip *h = mtod(pd->m, struct ip *); 2228 #ifdef INET6 2229 struct ip6_hdr *h6 = mtod(pd->m, struct ip6_hdr *); 2230 #endif /* INET6 */ 2231 2232 /* Clear IP_DF if no-df was requested */ 2233 if (pd->af == AF_INET && pd->act.flags & PFSTATE_NODF && 2234 h->ip_off & htons(IP_DF)) 2235 { 2236 u_int16_t ip_off = h->ip_off; 2237 2238 h->ip_off &= htons(~IP_DF); 2239 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0); 2240 } 2241 2242 /* Enforce a minimum ttl, may cause endless packet loops */ 2243 if (pd->af == AF_INET && pd->act.min_ttl && 2244 h->ip_ttl < pd->act.min_ttl) { 2245 u_int16_t ip_ttl = h->ip_ttl; 2246 2247 h->ip_ttl = pd->act.min_ttl; 2248 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); 2249 } 2250 #ifdef INET6 2251 /* Enforce a minimum ttl, may cause endless packet loops */ 2252 if (pd->af == AF_INET6 && pd->act.min_ttl && 2253 h6->ip6_hlim < pd->act.min_ttl) 2254 h6->ip6_hlim = pd->act.min_ttl; 2255 #endif /* INET6 */ 2256 /* Enforce tos */ 2257 if (pd->act.flags & PFSTATE_SETTOS) { 2258 switch (pd->af) { 2259 case AF_INET: { 2260 u_int16_t ov, nv; 2261 2262 ov = *(u_int16_t *)h; 2263 h->ip_tos = pd->act.set_tos | (h->ip_tos & IPTOS_ECN_MASK); 2264 nv = *(u_int16_t *)h; 2265 2266 h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); 2267 break; 2268 } 2269 #ifdef INET6 2270 case AF_INET6: 2271 h6->ip6_flow &= IPV6_FLOWLABEL_MASK | IPV6_VERSION_MASK; 2272 h6->ip6_flow |= htonl((pd->act.set_tos | IPV6_ECN(h6)) << 20); 2273 break; 2274 #endif /* INET6 */ 2275 } 2276 } 2277 2278 /* random-id, but not for fragments */ 2279 #ifdef INET 2280 if (pd->af == AF_INET && 2281 pd->act.flags & PFSTATE_RANDOMID && !(h->ip_off & ~htons(IP_DF))) { 2282 uint16_t ip_id = h->ip_id; 2283 2284 ip_fillid(h, V_ip_random_id); 2285 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); 2286 } 2287 #endif /* INET */ 2288 } 2289 #endif /* INET || INET6 */ 2290