1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * - Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * - Redistributions in binary form must reproduce the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer in the documentation and/or other materials provided 18 * with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Effort sponsored in part by the Defense Advanced Research Projects 34 * Agency (DARPA) and Air Force Research Laboratory, Air Force 35 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 36 * 37 * $OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $ 38 */ 39 40 #include <sys/cdefs.h> 41 #include "opt_bpf.h" 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_pf.h" 45 #include "opt_sctp.h" 46 47 #include <sys/param.h> 48 #include <sys/bus.h> 49 #include <sys/endian.h> 50 #include <sys/gsb_crc32.h> 51 #include <sys/hash.h> 52 #include <sys/interrupt.h> 53 #include <sys/kernel.h> 54 #include <sys/kthread.h> 55 #include <sys/limits.h> 56 #include <sys/mbuf.h> 57 #include <sys/random.h> 58 #include <sys/refcount.h> 59 #include <sys/sdt.h> 60 #include <sys/socket.h> 61 #include <sys/sysctl.h> 62 #include <sys/taskqueue.h> 63 #include <sys/ucred.h> 64 65 #include <crypto/sha2/sha512.h> 66 67 #include <net/if.h> 68 #include <net/if_var.h> 69 #include <net/if_private.h> 70 #include <net/if_types.h> 71 #include <net/if_vlan_var.h> 72 #include <net/route.h> 73 #include <net/route/nhop.h> 74 #include <net/vnet.h> 75 76 #include <net/pfil.h> 77 #include <net/pfvar.h> 78 #include <net/if_pflog.h> 79 #include <net/if_pfsync.h> 80 81 #include <netinet/in_pcb.h> 82 #include <netinet/in_var.h> 83 #include <netinet/in_fib.h> 84 #include <netinet/ip.h> 85 #include <netinet/ip_fw.h> 86 #include <netinet/ip_icmp.h> 87 #include <netinet/icmp_var.h> 88 #include <netinet/ip_var.h> 89 #include <netinet/tcp.h> 90 #include <netinet/tcp_fsm.h> 91 #include <netinet/tcp_seq.h> 92 #include <netinet/tcp_timer.h> 93 #include <netinet/tcp_var.h> 94 #include <netinet/udp.h> 95 #include <netinet/udp_var.h> 96 97 /* dummynet */ 98 #include <netinet/ip_dummynet.h> 99 #include <netinet/ip_fw.h> 100 #include <netpfil/ipfw/dn_heap.h> 101 #include <netpfil/ipfw/ip_fw_private.h> 102 #include <netpfil/ipfw/ip_dn_private.h> 103 104 #ifdef INET6 105 #include <netinet/ip6.h> 106 #include <netinet/icmp6.h> 107 #include <netinet6/nd6.h> 108 #include <netinet6/ip6_var.h> 109 #include <netinet6/in6_pcb.h> 110 #include <netinet6/in6_fib.h> 111 #include <netinet6/scope6_var.h> 112 #endif /* INET6 */ 113 114 #include <netinet/sctp_header.h> 115 #include <netinet/sctp_crc32.h> 116 117 #include <netipsec/ah.h> 118 119 #include <machine/in_cksum.h> 120 #include <security/mac/mac_framework.h> 121 122 SDT_PROVIDER_DEFINE(pf); 123 SDT_PROBE_DEFINE2(pf, , test, reason_set, "int", "int"); 124 SDT_PROBE_DEFINE4(pf, ip, test, done, "int", "int", "struct pf_krule *", 125 "struct pf_kstate *"); 126 SDT_PROBE_DEFINE5(pf, ip, state, lookup, "struct pfi_kkif *", 127 "struct pf_state_key_cmp *", "int", "struct pf_pdesc *", 128 "struct pf_kstate *"); 129 SDT_PROBE_DEFINE2(pf, ip, , bound_iface, "struct pf_kstate *", 130 "struct pfi_kkif *"); 131 SDT_PROBE_DEFINE4(pf, ip, route_to, entry, "struct mbuf *", 132 "struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *"); 133 SDT_PROBE_DEFINE1(pf, ip, route_to, drop, "int"); 134 SDT_PROBE_DEFINE2(pf, ip, route_to, output, "struct ifnet *", "int"); 135 SDT_PROBE_DEFINE4(pf, ip6, route_to, entry, "struct mbuf *", 136 "struct pf_pdesc *", "struct pf_kstate *", "struct ifnet *"); 137 SDT_PROBE_DEFINE1(pf, ip6, route_to, drop, "int"); 138 SDT_PROBE_DEFINE2(pf, ip6, route_to, output, "struct ifnet *", "int"); 139 SDT_PROBE_DEFINE4(pf, sctp, multihome, test, "struct pfi_kkif *", 140 "struct pf_krule *", "struct mbuf *", "int"); 141 SDT_PROBE_DEFINE2(pf, sctp, multihome, add, "uint32_t", 142 "struct pf_sctp_source *"); 143 SDT_PROBE_DEFINE3(pf, sctp, multihome, remove, "uint32_t", 144 "struct pf_kstate *", "struct pf_sctp_source *"); 145 SDT_PROBE_DEFINE4(pf, sctp, multihome_scan, entry, "int", 146 "int", "struct pf_pdesc *", "int"); 147 SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, param, "uint16_t", "uint16_t"); 148 SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, ipv4, "struct in_addr *", 149 "int"); 150 SDT_PROBE_DEFINE2(pf, sctp, multihome_scan, ipv6, "struct in_addr6 *", 151 "int"); 152 153 SDT_PROBE_DEFINE3(pf, eth, test_rule, entry, "int", "struct ifnet *", 154 "struct mbuf *"); 155 SDT_PROBE_DEFINE2(pf, eth, test_rule, test, "int", "struct pf_keth_rule *"); 156 SDT_PROBE_DEFINE3(pf, eth, test_rule, mismatch, 157 "int", "struct pf_keth_rule *", "char *"); 158 SDT_PROBE_DEFINE2(pf, eth, test_rule, match, "int", "struct pf_keth_rule *"); 159 SDT_PROBE_DEFINE2(pf, eth, test_rule, final_match, 160 "int", "struct pf_keth_rule *"); 161 SDT_PROBE_DEFINE2(pf, purge, state, rowcount, "int", "size_t"); 162 SDT_PROBE_DEFINE2(pf, , log, log, "int", "const char *"); 163 164 /* 165 * Global variables 166 */ 167 168 /* state tables */ 169 VNET_DEFINE(struct pf_altqqueue, pf_altqs[4]); 170 VNET_DEFINE(struct pf_kpalist, pf_pabuf[3]); 171 VNET_DEFINE(struct pf_altqqueue *, pf_altqs_active); 172 VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_active); 173 VNET_DEFINE(struct pf_altqqueue *, pf_altqs_inactive); 174 VNET_DEFINE(struct pf_altqqueue *, pf_altq_ifs_inactive); 175 VNET_DEFINE(struct pf_kstatus, pf_status); 176 177 VNET_DEFINE(u_int32_t, ticket_altqs_active); 178 VNET_DEFINE(u_int32_t, ticket_altqs_inactive); 179 VNET_DEFINE(int, altqs_inactive_open); 180 VNET_DEFINE(u_int32_t, ticket_pabuf); 181 182 static const int PF_HDR_LIMIT = 20; /* arbitrary limit */ 183 184 VNET_DEFINE(SHA512_CTX, pf_tcp_secret_ctx); 185 #define V_pf_tcp_secret_ctx VNET(pf_tcp_secret_ctx) 186 VNET_DEFINE(u_char, pf_tcp_secret[16]); 187 #define V_pf_tcp_secret VNET(pf_tcp_secret) 188 VNET_DEFINE(int, pf_tcp_secret_init); 189 #define V_pf_tcp_secret_init VNET(pf_tcp_secret_init) 190 VNET_DEFINE(int, pf_tcp_iss_off); 191 #define V_pf_tcp_iss_off VNET(pf_tcp_iss_off) 192 VNET_DECLARE(int, pf_vnet_active); 193 #define V_pf_vnet_active VNET(pf_vnet_active) 194 195 VNET_DEFINE_STATIC(uint32_t, pf_purge_idx); 196 #define V_pf_purge_idx VNET(pf_purge_idx) 197 198 #ifdef PF_WANT_32_TO_64_COUNTER 199 VNET_DEFINE_STATIC(uint32_t, pf_counter_periodic_iter); 200 #define V_pf_counter_periodic_iter VNET(pf_counter_periodic_iter) 201 202 VNET_DEFINE(struct allrulelist_head, pf_allrulelist); 203 VNET_DEFINE(size_t, pf_allrulecount); 204 VNET_DEFINE(struct pf_krule *, pf_rulemarker); 205 #endif 206 207 #define PF_SCTP_MAX_ENDPOINTS 8 208 209 struct pf_sctp_endpoint; 210 RB_HEAD(pf_sctp_endpoints, pf_sctp_endpoint); 211 struct pf_sctp_source { 212 sa_family_t af; 213 struct pf_addr addr; 214 TAILQ_ENTRY(pf_sctp_source) entry; 215 }; 216 TAILQ_HEAD(pf_sctp_sources, pf_sctp_source); 217 struct pf_sctp_endpoint 218 { 219 uint32_t v_tag; 220 struct pf_sctp_sources sources; 221 RB_ENTRY(pf_sctp_endpoint) entry; 222 }; 223 static int 224 pf_sctp_endpoint_compare(struct pf_sctp_endpoint *a, struct pf_sctp_endpoint *b) 225 { 226 return (a->v_tag - b->v_tag); 227 } 228 RB_PROTOTYPE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare); 229 RB_GENERATE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare); 230 VNET_DEFINE_STATIC(struct pf_sctp_endpoints, pf_sctp_endpoints); 231 #define V_pf_sctp_endpoints VNET(pf_sctp_endpoints) 232 static struct mtx_padalign pf_sctp_endpoints_mtx; 233 MTX_SYSINIT(pf_sctp_endpoints_mtx, &pf_sctp_endpoints_mtx, "SCTP endpoints", MTX_DEF); 234 #define PF_SCTP_ENDPOINTS_LOCK() mtx_lock(&pf_sctp_endpoints_mtx) 235 #define PF_SCTP_ENDPOINTS_UNLOCK() mtx_unlock(&pf_sctp_endpoints_mtx) 236 237 /* 238 * Queue for pf_intr() sends. 239 */ 240 static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations"); 241 struct pf_send_entry { 242 STAILQ_ENTRY(pf_send_entry) pfse_next; 243 struct mbuf *pfse_m; 244 enum { 245 PFSE_IP, 246 PFSE_IP6, 247 PFSE_ICMP, 248 PFSE_ICMP6, 249 } pfse_type; 250 struct { 251 int type; 252 int code; 253 int mtu; 254 } icmpopts; 255 }; 256 257 STAILQ_HEAD(pf_send_head, pf_send_entry); 258 VNET_DEFINE_STATIC(struct pf_send_head, pf_sendqueue); 259 #define V_pf_sendqueue VNET(pf_sendqueue) 260 261 static struct mtx_padalign pf_sendqueue_mtx; 262 MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF); 263 #define PF_SENDQ_LOCK() mtx_lock(&pf_sendqueue_mtx) 264 #define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx) 265 266 /* 267 * Queue for pf_overload_task() tasks. 268 */ 269 struct pf_overload_entry { 270 SLIST_ENTRY(pf_overload_entry) next; 271 struct pf_addr addr; 272 sa_family_t af; 273 uint8_t dir; 274 struct pf_krule *rule; 275 }; 276 277 SLIST_HEAD(pf_overload_head, pf_overload_entry); 278 VNET_DEFINE_STATIC(struct pf_overload_head, pf_overloadqueue); 279 #define V_pf_overloadqueue VNET(pf_overloadqueue) 280 VNET_DEFINE_STATIC(struct task, pf_overloadtask); 281 #define V_pf_overloadtask VNET(pf_overloadtask) 282 283 static struct mtx_padalign pf_overloadqueue_mtx; 284 MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx, 285 "pf overload/flush queue", MTX_DEF); 286 #define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx) 287 #define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx) 288 289 VNET_DEFINE(struct pf_krulequeue, pf_unlinked_rules); 290 struct mtx_padalign pf_unlnkdrules_mtx; 291 MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules", 292 MTX_DEF); 293 294 struct sx pf_config_lock; 295 SX_SYSINIT(pf_config_lock, &pf_config_lock, "pf config"); 296 297 struct mtx_padalign pf_table_stats_lock; 298 MTX_SYSINIT(pf_table_stats_lock, &pf_table_stats_lock, "pf table stats", 299 MTX_DEF); 300 301 VNET_DEFINE_STATIC(uma_zone_t, pf_sources_z); 302 #define V_pf_sources_z VNET(pf_sources_z) 303 uma_zone_t pf_mtag_z; 304 VNET_DEFINE(uma_zone_t, pf_state_z); 305 VNET_DEFINE(uma_zone_t, pf_state_key_z); 306 VNET_DEFINE(uma_zone_t, pf_udp_mapping_z); 307 308 VNET_DEFINE(struct unrhdr64, pf_stateid); 309 310 static void pf_src_tree_remove_state(struct pf_kstate *); 311 static int pf_check_threshold(struct pf_kthreshold *); 312 313 static void pf_change_ap(struct pf_pdesc *, struct pf_addr *, u_int16_t *, 314 struct pf_addr *, u_int16_t); 315 static int pf_modulate_sack(struct pf_pdesc *, 316 struct tcphdr *, struct pf_state_peer *); 317 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 318 u_int16_t *, u_int16_t *); 319 static void pf_change_icmp(struct pf_addr *, u_int16_t *, 320 struct pf_addr *, struct pf_addr *, u_int16_t, 321 u_int16_t *, u_int16_t *, u_int16_t *, 322 u_int16_t *, u_int8_t, sa_family_t); 323 int pf_change_icmp_af(struct mbuf *, int, 324 struct pf_pdesc *, struct pf_pdesc *, 325 struct pf_addr *, struct pf_addr *, sa_family_t, 326 sa_family_t); 327 int pf_translate_icmp_af(int, void *); 328 static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 329 int, sa_family_t, struct pf_krule *, int); 330 static void pf_detach_state(struct pf_kstate *); 331 static int pf_state_key_attach(struct pf_state_key *, 332 struct pf_state_key *, struct pf_kstate *); 333 static void pf_state_key_detach(struct pf_kstate *, int); 334 static int pf_state_key_ctor(void *, int, void *, int); 335 static u_int32_t pf_tcp_iss(struct pf_pdesc *); 336 static __inline void pf_dummynet_flag_remove(struct mbuf *m, 337 struct pf_mtag *pf_mtag); 338 static int pf_dummynet(struct pf_pdesc *, struct pf_kstate *, 339 struct pf_krule *, struct mbuf **); 340 static int pf_dummynet_route(struct pf_pdesc *, 341 struct pf_kstate *, struct pf_krule *, 342 struct ifnet *, const struct sockaddr *, struct mbuf **); 343 static int pf_test_eth_rule(int, struct pfi_kkif *, 344 struct mbuf **); 345 static int pf_test_rule(struct pf_krule **, struct pf_kstate **, 346 struct pf_pdesc *, struct pf_krule **, 347 struct pf_kruleset **, u_short *, struct inpcb *); 348 static int pf_create_state(struct pf_krule *, 349 struct pf_test_ctx *, 350 struct pf_kstate **, u_int16_t, u_int16_t); 351 static int pf_state_key_addr_setup(struct pf_pdesc *, 352 struct pf_state_key_cmp *, int); 353 static int pf_tcp_track_full(struct pf_kstate *, 354 struct pf_pdesc *, u_short *, int *, 355 struct pf_state_peer *, struct pf_state_peer *, 356 u_int8_t, u_int8_t); 357 static int pf_tcp_track_sloppy(struct pf_kstate *, 358 struct pf_pdesc *, u_short *, 359 struct pf_state_peer *, struct pf_state_peer *, 360 u_int8_t, u_int8_t); 361 static int pf_test_state(struct pf_kstate **, struct pf_pdesc *, 362 u_short *); 363 int pf_icmp_state_lookup(struct pf_state_key_cmp *, 364 struct pf_pdesc *, struct pf_kstate **, 365 u_int16_t, u_int16_t, int, int *, int, int); 366 static int pf_test_state_icmp(struct pf_kstate **, 367 struct pf_pdesc *, u_short *); 368 static int pf_sctp_track(struct pf_kstate *, struct pf_pdesc *, 369 u_short *); 370 static void pf_sctp_multihome_detach_addr(const struct pf_kstate *); 371 static void pf_sctp_multihome_delayed(struct pf_pdesc *, 372 struct pfi_kkif *, struct pf_kstate *, int); 373 static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, 374 int, u_int16_t); 375 static int pf_check_proto_cksum(struct mbuf *, int, int, 376 u_int8_t, sa_family_t); 377 static int pf_walk_option(struct pf_pdesc *, struct ip *, 378 int, int, u_short *); 379 static int pf_walk_header(struct pf_pdesc *, struct ip *, u_short *); 380 #ifdef INET6 381 static int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 382 int, int, u_short *); 383 static int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 384 u_short *); 385 #endif 386 static void pf_print_state_parts(struct pf_kstate *, 387 struct pf_state_key *, struct pf_state_key *); 388 static int pf_patch_8(struct pf_pdesc *, u_int8_t *, u_int8_t, 389 bool); 390 static int pf_find_state(struct pf_pdesc *, 391 const struct pf_state_key_cmp *, struct pf_kstate **); 392 static bool pf_src_connlimit(struct pf_kstate *); 393 static int pf_match_rcvif(struct mbuf *, struct pf_krule *); 394 static void pf_counters_inc(int, struct pf_pdesc *, 395 struct pf_kstate *, struct pf_krule *, 396 struct pf_krule *); 397 static void pf_log_matches(struct pf_pdesc *, struct pf_krule *, 398 struct pf_krule *, struct pf_kruleset *, 399 struct pf_krule_slist *); 400 static void pf_overload_task(void *v, int pending); 401 static u_short pf_insert_src_node(struct pf_ksrc_node *[PF_SN_MAX], 402 struct pf_srchash *[PF_SN_MAX], struct pf_krule *, 403 struct pf_addr *, sa_family_t, struct pf_addr *, 404 struct pfi_kkif *, pf_sn_types_t); 405 static u_int pf_purge_expired_states(u_int, int); 406 static void pf_purge_unlinked_rules(void); 407 static int pf_mtag_uminit(void *, int, int); 408 static void pf_mtag_free(struct m_tag *); 409 static void pf_packet_rework_nat(struct pf_pdesc *, int, 410 struct pf_state_key *); 411 #ifdef INET 412 static void pf_route(struct pf_krule *, 413 struct ifnet *, struct pf_kstate *, 414 struct pf_pdesc *, struct inpcb *); 415 #endif /* INET */ 416 #ifdef INET6 417 static void pf_change_a6(struct pf_addr *, u_int16_t *, 418 struct pf_addr *, u_int8_t); 419 static void pf_route6(struct pf_krule *, 420 struct ifnet *, struct pf_kstate *, 421 struct pf_pdesc *, struct inpcb *); 422 #endif /* INET6 */ 423 static __inline void pf_set_protostate(struct pf_kstate *, int, u_int8_t); 424 425 int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); 426 427 extern int pf_end_threads; 428 extern struct proc *pf_purge_proc; 429 430 VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); 431 432 #define PACKET_UNDO_NAT(_pd, _off, _s) \ 433 do { \ 434 struct pf_state_key *nk; \ 435 if ((pd->dir) == PF_OUT) \ 436 nk = (_s)->key[PF_SK_STACK]; \ 437 else \ 438 nk = (_s)->key[PF_SK_WIRE]; \ 439 pf_packet_rework_nat(_pd, _off, nk); \ 440 } while (0) 441 442 #define PACKET_LOOPED(pd) ((pd)->pf_mtag && \ 443 (pd)->pf_mtag->flags & PF_MTAG_FLAG_PACKET_LOOPED) 444 445 static struct pfi_kkif * 446 BOUND_IFACE(struct pf_kstate *st, struct pf_pdesc *pd) 447 { 448 struct pfi_kkif *k = pd->kif; 449 450 SDT_PROBE2(pf, ip, , bound_iface, st, k); 451 452 /* Floating unless otherwise specified. */ 453 if (! (st->rule->rule_flag & PFRULE_IFBOUND)) 454 return (V_pfi_all); 455 456 /* 457 * Initially set to all, because we don't know what interface we'll be 458 * sending this out when we create the state. 459 */ 460 if (st->rule->rt == PF_REPLYTO || (pd->af != pd->naf && st->direction == PF_IN)) 461 return (V_pfi_all); 462 463 /* 464 * If this state is created based on another state (e.g. SCTP 465 * multihome) always set it floating initially. We can't know for sure 466 * what interface the actual traffic for this state will come in on. 467 */ 468 if (pd->related_rule) 469 return (V_pfi_all); 470 471 /* Don't overrule the interface for states created on incoming packets. */ 472 if (st->direction == PF_IN) 473 return (k); 474 475 /* No route-to, so don't overrule. */ 476 if (st->act.rt != PF_ROUTETO) 477 return (k); 478 479 /* Bind to the route-to interface. */ 480 return (st->act.rt_kif); 481 } 482 483 #define STATE_INC_COUNTERS(s) \ 484 do { \ 485 struct pf_krule_item *mrm; \ 486 counter_u64_add(s->rule->states_cur, 1); \ 487 counter_u64_add(s->rule->states_tot, 1); \ 488 if (s->anchor != NULL) { \ 489 counter_u64_add(s->anchor->states_cur, 1); \ 490 counter_u64_add(s->anchor->states_tot, 1); \ 491 } \ 492 if (s->nat_rule != NULL) { \ 493 counter_u64_add(s->nat_rule->states_cur, 1);\ 494 counter_u64_add(s->nat_rule->states_tot, 1);\ 495 } \ 496 SLIST_FOREACH(mrm, &s->match_rules, entry) { \ 497 counter_u64_add(mrm->r->states_cur, 1); \ 498 counter_u64_add(mrm->r->states_tot, 1); \ 499 } \ 500 } while (0) 501 502 #define STATE_DEC_COUNTERS(s) \ 503 do { \ 504 struct pf_krule_item *mrm; \ 505 if (s->nat_rule != NULL) \ 506 counter_u64_add(s->nat_rule->states_cur, -1);\ 507 if (s->anchor != NULL) \ 508 counter_u64_add(s->anchor->states_cur, -1); \ 509 counter_u64_add(s->rule->states_cur, -1); \ 510 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 511 counter_u64_add(mrm->r->states_cur, -1); \ 512 } while (0) 513 514 MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures"); 515 MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items"); 516 VNET_DEFINE(struct pf_keyhash *, pf_keyhash); 517 VNET_DEFINE(struct pf_idhash *, pf_idhash); 518 VNET_DEFINE(struct pf_srchash *, pf_srchash); 519 VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash); 520 VNET_DEFINE(struct pf_udpendpointmapping *, pf_udpendpointmapping); 521 522 SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 523 "pf(4)"); 524 525 VNET_DEFINE(u_long, pf_hashmask); 526 VNET_DEFINE(u_long, pf_srchashmask); 527 VNET_DEFINE(u_long, pf_udpendpointhashmask); 528 VNET_DEFINE_STATIC(u_long, pf_hashsize); 529 #define V_pf_hashsize VNET(pf_hashsize) 530 VNET_DEFINE_STATIC(u_long, pf_srchashsize); 531 #define V_pf_srchashsize VNET(pf_srchashsize) 532 VNET_DEFINE_STATIC(u_long, pf_udpendpointhashsize); 533 #define V_pf_udpendpointhashsize VNET(pf_udpendpointhashsize) 534 u_long pf_ioctl_maxcount = 65535; 535 536 SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN, 537 &VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable"); 538 SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN, 539 &VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable"); 540 SYSCTL_ULONG(_net_pf, OID_AUTO, udpendpoint_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN, 541 &VNET_NAME(pf_udpendpointhashsize), 0, "Size of pf(4) endpoint hashtable"); 542 SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN, 543 &pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call"); 544 545 VNET_DEFINE(void *, pf_swi_cookie); 546 VNET_DEFINE(struct intr_event *, pf_swi_ie); 547 548 VNET_DEFINE(uint32_t, pf_hashseed); 549 #define V_pf_hashseed VNET(pf_hashseed) 550 551 static void 552 pf_sctp_checksum(struct mbuf *m, int off) 553 { 554 uint32_t sum = 0; 555 556 /* Zero out the checksum, to enable recalculation. */ 557 m_copyback(m, off + offsetof(struct sctphdr, checksum), 558 sizeof(sum), (caddr_t)&sum); 559 560 sum = sctp_calculate_cksum(m, off); 561 562 m_copyback(m, off + offsetof(struct sctphdr, checksum), 563 sizeof(sum), (caddr_t)&sum); 564 } 565 566 int 567 pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 568 { 569 570 switch (af) { 571 #ifdef INET 572 case AF_INET: 573 if (a->addr32[0] > b->addr32[0]) 574 return (1); 575 if (a->addr32[0] < b->addr32[0]) 576 return (-1); 577 break; 578 #endif /* INET */ 579 #ifdef INET6 580 case AF_INET6: 581 if (a->addr32[3] > b->addr32[3]) 582 return (1); 583 if (a->addr32[3] < b->addr32[3]) 584 return (-1); 585 if (a->addr32[2] > b->addr32[2]) 586 return (1); 587 if (a->addr32[2] < b->addr32[2]) 588 return (-1); 589 if (a->addr32[1] > b->addr32[1]) 590 return (1); 591 if (a->addr32[1] < b->addr32[1]) 592 return (-1); 593 if (a->addr32[0] > b->addr32[0]) 594 return (1); 595 if (a->addr32[0] < b->addr32[0]) 596 return (-1); 597 break; 598 #endif /* INET6 */ 599 default: 600 unhandled_af(af); 601 } 602 return (0); 603 } 604 605 static bool 606 pf_is_loopback(sa_family_t af, struct pf_addr *addr) 607 { 608 switch (af) { 609 #ifdef INET 610 case AF_INET: 611 return IN_LOOPBACK(ntohl(addr->v4.s_addr)); 612 #endif /* INET */ 613 case AF_INET6: 614 return IN6_IS_ADDR_LOOPBACK(&addr->v6); 615 default: 616 unhandled_af(af); 617 } 618 } 619 620 static void 621 pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk) 622 { 623 624 switch (pd->proto) { 625 case IPPROTO_TCP: { 626 struct tcphdr *th = &pd->hdr.tcp; 627 628 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) 629 pf_change_ap(pd, pd->src, &th->th_sport, 630 &nk->addr[pd->sidx], nk->port[pd->sidx]); 631 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) 632 pf_change_ap(pd, pd->dst, &th->th_dport, 633 &nk->addr[pd->didx], nk->port[pd->didx]); 634 m_copyback(pd->m, off, sizeof(*th), (caddr_t)th); 635 break; 636 } 637 case IPPROTO_UDP: { 638 struct udphdr *uh = &pd->hdr.udp; 639 640 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) 641 pf_change_ap(pd, pd->src, &uh->uh_sport, 642 &nk->addr[pd->sidx], nk->port[pd->sidx]); 643 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) 644 pf_change_ap(pd, pd->dst, &uh->uh_dport, 645 &nk->addr[pd->didx], nk->port[pd->didx]); 646 m_copyback(pd->m, off, sizeof(*uh), (caddr_t)uh); 647 break; 648 } 649 case IPPROTO_SCTP: { 650 struct sctphdr *sh = &pd->hdr.sctp; 651 652 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) { 653 pf_change_ap(pd, pd->src, &sh->src_port, 654 &nk->addr[pd->sidx], nk->port[pd->sidx]); 655 } 656 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) { 657 pf_change_ap(pd, pd->dst, &sh->dest_port, 658 &nk->addr[pd->didx], nk->port[pd->didx]); 659 } 660 661 break; 662 } 663 case IPPROTO_ICMP: { 664 struct icmp *ih = &pd->hdr.icmp; 665 666 if (nk->port[pd->sidx] != ih->icmp_id) { 667 pd->hdr.icmp.icmp_cksum = pf_cksum_fixup( 668 ih->icmp_cksum, ih->icmp_id, 669 nk->port[pd->sidx], 0); 670 ih->icmp_id = nk->port[pd->sidx]; 671 pd->sport = &ih->icmp_id; 672 673 m_copyback(pd->m, off, ICMP_MINLEN, (caddr_t)ih); 674 } 675 /* FALLTHROUGH */ 676 } 677 default: 678 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) { 679 switch (pd->af) { 680 case AF_INET: 681 pf_change_a(&pd->src->v4.s_addr, 682 pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 683 0); 684 break; 685 case AF_INET6: 686 pf_addrcpy(pd->src, &nk->addr[pd->sidx], 687 pd->af); 688 break; 689 default: 690 unhandled_af(pd->af); 691 } 692 } 693 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) { 694 switch (pd->af) { 695 case AF_INET: 696 pf_change_a(&pd->dst->v4.s_addr, 697 pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 698 0); 699 break; 700 case AF_INET6: 701 pf_addrcpy(pd->dst, &nk->addr[pd->didx], 702 pd->af); 703 break; 704 default: 705 unhandled_af(pd->af); 706 } 707 } 708 break; 709 } 710 } 711 712 static __inline uint32_t 713 pf_hashkey(const struct pf_state_key *sk) 714 { 715 uint32_t h; 716 717 h = murmur3_32_hash32((const uint32_t *)sk, 718 sizeof(struct pf_state_key_cmp)/sizeof(uint32_t), 719 V_pf_hashseed); 720 721 return (h & V_pf_hashmask); 722 } 723 724 __inline uint32_t 725 pf_hashsrc(struct pf_addr *addr, sa_family_t af) 726 { 727 uint32_t h; 728 729 switch (af) { 730 case AF_INET: 731 h = murmur3_32_hash32((uint32_t *)&addr->v4, 732 sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed); 733 break; 734 case AF_INET6: 735 h = murmur3_32_hash32((uint32_t *)&addr->v6, 736 sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed); 737 break; 738 default: 739 unhandled_af(af); 740 } 741 742 return (h & V_pf_srchashmask); 743 } 744 745 static inline uint32_t 746 pf_hashudpendpoint(struct pf_udp_endpoint *endpoint) 747 { 748 uint32_t h; 749 750 h = murmur3_32_hash32((uint32_t *)endpoint, 751 sizeof(struct pf_udp_endpoint_cmp)/sizeof(uint32_t), 752 V_pf_hashseed); 753 return (h & V_pf_udpendpointhashmask); 754 } 755 756 #ifdef ALTQ 757 static int 758 pf_state_hash(struct pf_kstate *s) 759 { 760 u_int32_t hv = (intptr_t)s / sizeof(*s); 761 762 hv ^= crc32(&s->src, sizeof(s->src)); 763 hv ^= crc32(&s->dst, sizeof(s->dst)); 764 if (hv == 0) 765 hv = 1; 766 return (hv); 767 } 768 #endif /* ALTQ */ 769 770 static __inline void 771 pf_set_protostate(struct pf_kstate *s, int which, u_int8_t newstate) 772 { 773 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 774 s->dst.state = newstate; 775 if (which == PF_PEER_DST) 776 return; 777 if (s->src.state == newstate) 778 return; 779 if (s->creatorid == V_pf_status.hostid && 780 s->key[PF_SK_STACK] != NULL && 781 s->key[PF_SK_STACK]->proto == IPPROTO_TCP && 782 !(TCPS_HAVEESTABLISHED(s->src.state) || 783 s->src.state == TCPS_CLOSED) && 784 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 785 atomic_add_32(&V_pf_status.states_halfopen, -1); 786 787 s->src.state = newstate; 788 } 789 790 bool 791 pf_init_threshold(struct pf_kthreshold *threshold, 792 u_int32_t limit, u_int32_t seconds) 793 { 794 threshold->limit = limit; 795 threshold->seconds = seconds; 796 threshold->cr = counter_rate_alloc(M_NOWAIT, seconds); 797 798 return (threshold->cr != NULL); 799 } 800 801 static int 802 pf_check_threshold(struct pf_kthreshold *threshold) 803 { 804 return (counter_ratecheck(threshold->cr, threshold->limit) < 0); 805 } 806 807 static bool 808 pf_src_connlimit(struct pf_kstate *state) 809 { 810 struct pf_overload_entry *pfoe; 811 struct pf_ksrc_node *src_node = state->sns[PF_SN_LIMIT]; 812 bool limited = false; 813 814 PF_STATE_LOCK_ASSERT(state); 815 PF_SRC_NODE_LOCK(src_node); 816 817 src_node->conn++; 818 state->src.tcp_est = 1; 819 820 if (state->rule->max_src_conn && 821 state->rule->max_src_conn < 822 src_node->conn) { 823 counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1); 824 limited = true; 825 } 826 827 if (state->rule->max_src_conn_rate.limit && 828 pf_check_threshold(&src_node->conn_rate)) { 829 counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1); 830 limited = true; 831 } 832 833 if (!limited) 834 goto done; 835 836 /* Kill this state. */ 837 state->timeout = PFTM_PURGE; 838 pf_set_protostate(state, PF_PEER_BOTH, TCPS_CLOSED); 839 840 if (state->rule->overload_tbl == NULL) 841 goto done; 842 843 /* Schedule overloading and flushing task. */ 844 pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT); 845 if (pfoe == NULL) 846 goto done; /* too bad :( */ 847 848 bcopy(&src_node->addr, &pfoe->addr, sizeof(pfoe->addr)); 849 pfoe->af = state->key[PF_SK_WIRE]->af; 850 pfoe->rule = state->rule; 851 pfoe->dir = state->direction; 852 PF_OVERLOADQ_LOCK(); 853 SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next); 854 PF_OVERLOADQ_UNLOCK(); 855 taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask); 856 857 done: 858 PF_SRC_NODE_UNLOCK(src_node); 859 return (limited); 860 } 861 862 static void 863 pf_overload_task(void *v, int pending) 864 { 865 struct pf_overload_head queue; 866 struct pfr_addr p; 867 struct pf_overload_entry *pfoe, *pfoe1; 868 uint32_t killed = 0; 869 870 CURVNET_SET((struct vnet *)v); 871 872 PF_OVERLOADQ_LOCK(); 873 queue = V_pf_overloadqueue; 874 SLIST_INIT(&V_pf_overloadqueue); 875 PF_OVERLOADQ_UNLOCK(); 876 877 bzero(&p, sizeof(p)); 878 SLIST_FOREACH(pfoe, &queue, next) { 879 counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1); 880 if (V_pf_status.debug >= PF_DEBUG_MISC) { 881 printf("%s: blocking address ", __func__); 882 pf_print_host(&pfoe->addr, 0, pfoe->af); 883 printf("\n"); 884 } 885 886 p.pfra_af = pfoe->af; 887 switch (pfoe->af) { 888 #ifdef INET 889 case AF_INET: 890 p.pfra_net = 32; 891 p.pfra_ip4addr = pfoe->addr.v4; 892 break; 893 #endif /* INET */ 894 #ifdef INET6 895 case AF_INET6: 896 p.pfra_net = 128; 897 p.pfra_ip6addr = pfoe->addr.v6; 898 break; 899 #endif /* INET6 */ 900 default: 901 unhandled_af(pfoe->af); 902 } 903 904 PF_RULES_WLOCK(); 905 pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second); 906 PF_RULES_WUNLOCK(); 907 } 908 909 /* 910 * Remove those entries, that don't need flushing. 911 */ 912 SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1) 913 if (pfoe->rule->flush == 0) { 914 SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next); 915 free(pfoe, M_PFTEMP); 916 } else 917 counter_u64_add( 918 V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1); 919 920 /* If nothing to flush, return. */ 921 if (SLIST_EMPTY(&queue)) { 922 CURVNET_RESTORE(); 923 return; 924 } 925 926 for (int i = 0; i <= V_pf_hashmask; i++) { 927 struct pf_idhash *ih = &V_pf_idhash[i]; 928 struct pf_state_key *sk; 929 struct pf_kstate *s; 930 931 PF_HASHROW_LOCK(ih); 932 LIST_FOREACH(s, &ih->states, entry) { 933 sk = s->key[PF_SK_WIRE]; 934 SLIST_FOREACH(pfoe, &queue, next) 935 if (sk->af == pfoe->af && 936 ((pfoe->rule->flush & PF_FLUSH_GLOBAL) || 937 pfoe->rule == s->rule) && 938 ((pfoe->dir == PF_OUT && 939 PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) || 940 (pfoe->dir == PF_IN && 941 PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) { 942 s->timeout = PFTM_PURGE; 943 pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED); 944 killed++; 945 } 946 } 947 PF_HASHROW_UNLOCK(ih); 948 } 949 SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1) 950 free(pfoe, M_PFTEMP); 951 if (V_pf_status.debug >= PF_DEBUG_MISC) 952 printf("%s: %u states killed", __func__, killed); 953 954 CURVNET_RESTORE(); 955 } 956 957 /* 958 * On node found always returns locked. On not found its configurable. 959 */ 960 struct pf_ksrc_node * 961 pf_find_src_node(struct pf_addr *src, struct pf_krule *rule, sa_family_t af, 962 struct pf_srchash **sh, pf_sn_types_t sn_type, bool returnlocked) 963 { 964 struct pf_ksrc_node *n; 965 966 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1); 967 968 *sh = &V_pf_srchash[pf_hashsrc(src, af)]; 969 PF_HASHROW_LOCK(*sh); 970 LIST_FOREACH(n, &(*sh)->nodes, entry) 971 if (n->rule == rule && n->af == af && n->type == sn_type && 972 ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) || 973 (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0))) 974 break; 975 976 if (n == NULL && !returnlocked) 977 PF_HASHROW_UNLOCK(*sh); 978 979 return (n); 980 } 981 982 bool 983 pf_src_node_exists(struct pf_ksrc_node **sn, struct pf_srchash *sh) 984 { 985 struct pf_ksrc_node *cur; 986 987 if ((*sn) == NULL) 988 return (false); 989 990 KASSERT(sh != NULL, ("%s: sh is NULL", __func__)); 991 992 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1); 993 PF_HASHROW_LOCK(sh); 994 LIST_FOREACH(cur, &(sh->nodes), entry) { 995 if (cur == (*sn) && 996 cur->expire != 1) /* Ignore nodes being killed */ 997 return (true); 998 } 999 PF_HASHROW_UNLOCK(sh); 1000 (*sn) = NULL; 1001 return (false); 1002 } 1003 1004 static void 1005 pf_free_src_node(struct pf_ksrc_node *sn) 1006 { 1007 1008 for (int i = 0; i < 2; i++) { 1009 counter_u64_free(sn->bytes[i]); 1010 counter_u64_free(sn->packets[i]); 1011 } 1012 counter_rate_free(sn->conn_rate.cr); 1013 uma_zfree(V_pf_sources_z, sn); 1014 } 1015 1016 static u_short 1017 pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX], 1018 struct pf_srchash *snhs[PF_SN_MAX], struct pf_krule *rule, 1019 struct pf_addr *src, sa_family_t af, struct pf_addr *raddr, 1020 struct pfi_kkif *rkif, pf_sn_types_t sn_type) 1021 { 1022 u_short reason = 0; 1023 struct pf_krule *r_track = rule; 1024 struct pf_ksrc_node **sn = &(sns[sn_type]); 1025 struct pf_srchash **sh = &(snhs[sn_type]); 1026 1027 KASSERT(sn_type != PF_SN_LIMIT || (raddr == NULL && rkif == NULL), 1028 ("%s: raddr and rkif must be NULL for PF_SN_LIMIT", __func__)); 1029 1030 KASSERT(sn_type != PF_SN_LIMIT || (rule->rule_flag & PFRULE_SRCTRACK), 1031 ("%s: PF_SN_LIMIT only valid for rules with PFRULE_SRCTRACK", __func__)); 1032 1033 /* 1034 * XXX: There could be a KASSERT for 1035 * sn_type == PF_SN_LIMIT || (pool->opts & PF_POOL_STICKYADDR) 1036 * but we'd need to pass pool *only* for this KASSERT. 1037 */ 1038 1039 if ( (rule->rule_flag & PFRULE_SRCTRACK) && 1040 !(rule->rule_flag & PFRULE_RULESRCTRACK)) 1041 r_track = &V_pf_default_rule; 1042 1043 /* 1044 * Request the sh to always be locked, as we might insert a new sn. 1045 */ 1046 if (*sn == NULL) 1047 *sn = pf_find_src_node(src, r_track, af, sh, sn_type, true); 1048 1049 if (*sn == NULL) { 1050 PF_HASHROW_ASSERT(*sh); 1051 1052 if (sn_type == PF_SN_LIMIT && rule->max_src_nodes && 1053 counter_u64_fetch(r_track->src_nodes[sn_type]) >= rule->max_src_nodes) { 1054 counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES], 1); 1055 reason = PFRES_SRCLIMIT; 1056 goto done; 1057 } 1058 1059 (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO); 1060 if ((*sn) == NULL) { 1061 reason = PFRES_MEMORY; 1062 goto done; 1063 } 1064 1065 for (int i = 0; i < 2; i++) { 1066 (*sn)->bytes[i] = counter_u64_alloc(M_NOWAIT); 1067 (*sn)->packets[i] = counter_u64_alloc(M_NOWAIT); 1068 1069 if ((*sn)->bytes[i] == NULL || (*sn)->packets[i] == NULL) { 1070 pf_free_src_node(*sn); 1071 reason = PFRES_MEMORY; 1072 goto done; 1073 } 1074 } 1075 1076 if (sn_type == PF_SN_LIMIT) 1077 if (! pf_init_threshold(&(*sn)->conn_rate, 1078 rule->max_src_conn_rate.limit, 1079 rule->max_src_conn_rate.seconds)) { 1080 pf_free_src_node(*sn); 1081 reason = PFRES_MEMORY; 1082 goto done; 1083 } 1084 1085 MPASS((*sn)->lock == NULL); 1086 (*sn)->lock = &(*sh)->lock; 1087 1088 (*sn)->af = af; 1089 (*sn)->rule = r_track; 1090 pf_addrcpy(&(*sn)->addr, src, af); 1091 if (raddr != NULL) 1092 pf_addrcpy(&(*sn)->raddr, raddr, af); 1093 (*sn)->rkif = rkif; 1094 LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry); 1095 (*sn)->creation = time_uptime; 1096 (*sn)->ruletype = rule->action; 1097 (*sn)->type = sn_type; 1098 counter_u64_add(r_track->src_nodes[sn_type], 1); 1099 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1); 1100 } else { 1101 if (sn_type == PF_SN_LIMIT && rule->max_src_states && 1102 (*sn)->states >= rule->max_src_states) { 1103 counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES], 1104 1); 1105 reason = PFRES_SRCLIMIT; 1106 goto done; 1107 } 1108 } 1109 done: 1110 if (reason == 0) 1111 (*sn)->states++; 1112 else 1113 (*sn) = NULL; 1114 1115 PF_HASHROW_UNLOCK(*sh); 1116 return (reason); 1117 } 1118 1119 void 1120 pf_unlink_src_node(struct pf_ksrc_node *src) 1121 { 1122 PF_SRC_NODE_LOCK_ASSERT(src); 1123 1124 LIST_REMOVE(src, entry); 1125 if (src->rule) 1126 counter_u64_add(src->rule->src_nodes[src->type], -1); 1127 } 1128 1129 u_int 1130 pf_free_src_nodes(struct pf_ksrc_node_list *head) 1131 { 1132 struct pf_ksrc_node *sn, *tmp; 1133 u_int count = 0; 1134 1135 LIST_FOREACH_SAFE(sn, head, entry, tmp) { 1136 pf_free_src_node(sn); 1137 count++; 1138 } 1139 1140 counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count); 1141 1142 return (count); 1143 } 1144 1145 void 1146 pf_mtag_initialize(void) 1147 { 1148 1149 pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) + 1150 sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL, 1151 UMA_ALIGN_PTR, 0); 1152 } 1153 1154 /* Per-vnet data storage structures initialization. */ 1155 void 1156 pf_initialize(void) 1157 { 1158 struct pf_keyhash *kh; 1159 struct pf_idhash *ih; 1160 struct pf_srchash *sh; 1161 struct pf_udpendpointhash *uh; 1162 u_int i; 1163 1164 if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize)) 1165 V_pf_hashsize = PF_HASHSIZ; 1166 if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize)) 1167 V_pf_srchashsize = PF_SRCHASHSIZ; 1168 if (V_pf_udpendpointhashsize == 0 || !powerof2(V_pf_udpendpointhashsize)) 1169 V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ; 1170 1171 V_pf_hashseed = arc4random(); 1172 1173 /* States and state keys storage. */ 1174 V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_kstate), 1175 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1176 V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z; 1177 uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT); 1178 uma_zone_set_warning(V_pf_state_z, "PF states limit reached"); 1179 1180 V_pf_state_key_z = uma_zcreate("pf state keys", 1181 sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL, 1182 UMA_ALIGN_PTR, 0); 1183 1184 V_pf_keyhash = mallocarray(V_pf_hashsize, sizeof(struct pf_keyhash), 1185 M_PFHASH, M_NOWAIT | M_ZERO); 1186 V_pf_idhash = mallocarray(V_pf_hashsize, sizeof(struct pf_idhash), 1187 M_PFHASH, M_NOWAIT | M_ZERO); 1188 if (V_pf_keyhash == NULL || V_pf_idhash == NULL) { 1189 printf("pf: Unable to allocate memory for " 1190 "state_hashsize %lu.\n", V_pf_hashsize); 1191 1192 free(V_pf_keyhash, M_PFHASH); 1193 free(V_pf_idhash, M_PFHASH); 1194 1195 V_pf_hashsize = PF_HASHSIZ; 1196 V_pf_keyhash = mallocarray(V_pf_hashsize, 1197 sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO); 1198 V_pf_idhash = mallocarray(V_pf_hashsize, 1199 sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO); 1200 } 1201 1202 V_pf_hashmask = V_pf_hashsize - 1; 1203 for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask; 1204 i++, kh++, ih++) { 1205 mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK); 1206 mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF); 1207 } 1208 1209 /* Source nodes. */ 1210 V_pf_sources_z = uma_zcreate("pf source nodes", 1211 sizeof(struct pf_ksrc_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1212 0); 1213 V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z; 1214 uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT); 1215 uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached"); 1216 1217 V_pf_srchash = mallocarray(V_pf_srchashsize, 1218 sizeof(struct pf_srchash), M_PFHASH, M_NOWAIT | M_ZERO); 1219 if (V_pf_srchash == NULL) { 1220 printf("pf: Unable to allocate memory for " 1221 "source_hashsize %lu.\n", V_pf_srchashsize); 1222 1223 V_pf_srchashsize = PF_SRCHASHSIZ; 1224 V_pf_srchash = mallocarray(V_pf_srchashsize, 1225 sizeof(struct pf_srchash), M_PFHASH, M_WAITOK | M_ZERO); 1226 } 1227 1228 V_pf_srchashmask = V_pf_srchashsize - 1; 1229 for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) 1230 mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF); 1231 1232 1233 /* UDP endpoint mappings. */ 1234 V_pf_udp_mapping_z = uma_zcreate("pf UDP mappings", 1235 sizeof(struct pf_udp_mapping), NULL, NULL, NULL, NULL, 1236 UMA_ALIGN_PTR, 0); 1237 V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize, 1238 sizeof(struct pf_udpendpointhash), M_PFHASH, M_NOWAIT | M_ZERO); 1239 if (V_pf_udpendpointhash == NULL) { 1240 printf("pf: Unable to allocate memory for " 1241 "udpendpoint_hashsize %lu.\n", V_pf_udpendpointhashsize); 1242 1243 V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ; 1244 V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize, 1245 sizeof(struct pf_udpendpointhash), M_PFHASH, M_WAITOK | M_ZERO); 1246 } 1247 1248 V_pf_udpendpointhashmask = V_pf_udpendpointhashsize - 1; 1249 for (i = 0, uh = V_pf_udpendpointhash; 1250 i <= V_pf_udpendpointhashmask; 1251 i++, uh++) { 1252 mtx_init(&uh->lock, "pf_udpendpointhash", NULL, 1253 MTX_DEF | MTX_DUPOK); 1254 } 1255 1256 /* ALTQ */ 1257 TAILQ_INIT(&V_pf_altqs[0]); 1258 TAILQ_INIT(&V_pf_altqs[1]); 1259 TAILQ_INIT(&V_pf_altqs[2]); 1260 TAILQ_INIT(&V_pf_altqs[3]); 1261 TAILQ_INIT(&V_pf_pabuf[0]); 1262 TAILQ_INIT(&V_pf_pabuf[1]); 1263 TAILQ_INIT(&V_pf_pabuf[2]); 1264 V_pf_altqs_active = &V_pf_altqs[0]; 1265 V_pf_altq_ifs_active = &V_pf_altqs[1]; 1266 V_pf_altqs_inactive = &V_pf_altqs[2]; 1267 V_pf_altq_ifs_inactive = &V_pf_altqs[3]; 1268 1269 /* Send & overload+flush queues. */ 1270 STAILQ_INIT(&V_pf_sendqueue); 1271 SLIST_INIT(&V_pf_overloadqueue); 1272 TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet); 1273 1274 /* Unlinked, but may be referenced rules. */ 1275 TAILQ_INIT(&V_pf_unlinked_rules); 1276 } 1277 1278 void 1279 pf_mtag_cleanup(void) 1280 { 1281 1282 uma_zdestroy(pf_mtag_z); 1283 } 1284 1285 void 1286 pf_cleanup(void) 1287 { 1288 struct pf_keyhash *kh; 1289 struct pf_idhash *ih; 1290 struct pf_srchash *sh; 1291 struct pf_udpendpointhash *uh; 1292 struct pf_send_entry *pfse, *next; 1293 u_int i; 1294 1295 for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; 1296 i <= V_pf_hashmask; 1297 i++, kh++, ih++) { 1298 KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty", 1299 __func__)); 1300 KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty", 1301 __func__)); 1302 mtx_destroy(&kh->lock); 1303 mtx_destroy(&ih->lock); 1304 } 1305 free(V_pf_keyhash, M_PFHASH); 1306 free(V_pf_idhash, M_PFHASH); 1307 1308 for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { 1309 KASSERT(LIST_EMPTY(&sh->nodes), 1310 ("%s: source node hash not empty", __func__)); 1311 mtx_destroy(&sh->lock); 1312 } 1313 free(V_pf_srchash, M_PFHASH); 1314 1315 for (i = 0, uh = V_pf_udpendpointhash; 1316 i <= V_pf_udpendpointhashmask; 1317 i++, uh++) { 1318 KASSERT(LIST_EMPTY(&uh->endpoints), 1319 ("%s: udp endpoint hash not empty", __func__)); 1320 mtx_destroy(&uh->lock); 1321 } 1322 free(V_pf_udpendpointhash, M_PFHASH); 1323 1324 STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) { 1325 m_freem(pfse->pfse_m); 1326 free(pfse, M_PFTEMP); 1327 } 1328 MPASS(RB_EMPTY(&V_pf_sctp_endpoints)); 1329 1330 uma_zdestroy(V_pf_sources_z); 1331 uma_zdestroy(V_pf_state_z); 1332 uma_zdestroy(V_pf_state_key_z); 1333 uma_zdestroy(V_pf_udp_mapping_z); 1334 } 1335 1336 static int 1337 pf_mtag_uminit(void *mem, int size, int how) 1338 { 1339 struct m_tag *t; 1340 1341 t = (struct m_tag *)mem; 1342 t->m_tag_cookie = MTAG_ABI_COMPAT; 1343 t->m_tag_id = PACKET_TAG_PF; 1344 t->m_tag_len = sizeof(struct pf_mtag); 1345 t->m_tag_free = pf_mtag_free; 1346 1347 return (0); 1348 } 1349 1350 static void 1351 pf_mtag_free(struct m_tag *t) 1352 { 1353 1354 uma_zfree(pf_mtag_z, t); 1355 } 1356 1357 struct pf_mtag * 1358 pf_get_mtag(struct mbuf *m) 1359 { 1360 struct m_tag *mtag; 1361 1362 if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL) 1363 return ((struct pf_mtag *)(mtag + 1)); 1364 1365 mtag = uma_zalloc(pf_mtag_z, M_NOWAIT); 1366 if (mtag == NULL) 1367 return (NULL); 1368 bzero(mtag + 1, sizeof(struct pf_mtag)); 1369 m_tag_prepend(m, mtag); 1370 1371 return ((struct pf_mtag *)(mtag + 1)); 1372 } 1373 1374 static int 1375 pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks, 1376 struct pf_kstate *s) 1377 { 1378 struct pf_keyhash *khs, *khw, *kh; 1379 struct pf_state_key *sk, *cur; 1380 struct pf_kstate *si, *olds = NULL; 1381 int idx; 1382 1383 NET_EPOCH_ASSERT(); 1384 KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); 1385 KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__)); 1386 KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__)); 1387 1388 /* 1389 * We need to lock hash slots of both keys. To avoid deadlock 1390 * we always lock the slot with lower address first. Unlock order 1391 * isn't important. 1392 * 1393 * We also need to lock ID hash slot before dropping key 1394 * locks. On success we return with ID hash slot locked. 1395 */ 1396 1397 if (skw == sks) { 1398 khs = khw = &V_pf_keyhash[pf_hashkey(skw)]; 1399 PF_HASHROW_LOCK(khs); 1400 } else { 1401 khs = &V_pf_keyhash[pf_hashkey(sks)]; 1402 khw = &V_pf_keyhash[pf_hashkey(skw)]; 1403 if (khs == khw) { 1404 PF_HASHROW_LOCK(khs); 1405 } else if (khs < khw) { 1406 PF_HASHROW_LOCK(khs); 1407 PF_HASHROW_LOCK(khw); 1408 } else { 1409 PF_HASHROW_LOCK(khw); 1410 PF_HASHROW_LOCK(khs); 1411 } 1412 } 1413 1414 #define KEYS_UNLOCK() do { \ 1415 if (khs != khw) { \ 1416 PF_HASHROW_UNLOCK(khs); \ 1417 PF_HASHROW_UNLOCK(khw); \ 1418 } else \ 1419 PF_HASHROW_UNLOCK(khs); \ 1420 } while (0) 1421 1422 /* 1423 * First run: start with wire key. 1424 */ 1425 sk = skw; 1426 kh = khw; 1427 idx = PF_SK_WIRE; 1428 1429 MPASS(s->lock == NULL); 1430 s->lock = &V_pf_idhash[PF_IDHASH(s)].lock; 1431 1432 keyattach: 1433 LIST_FOREACH(cur, &kh->keys, entry) 1434 if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0) 1435 break; 1436 1437 if (cur != NULL) { 1438 /* Key exists. Check for same kif, if none, add to key. */ 1439 TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) { 1440 struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)]; 1441 1442 PF_HASHROW_LOCK(ih); 1443 if (si->kif == s->kif && 1444 ((si->key[PF_SK_WIRE]->af == sk->af && 1445 si->direction == s->direction) || 1446 (si->key[PF_SK_WIRE]->af != 1447 si->key[PF_SK_STACK]->af && 1448 sk->af == si->key[PF_SK_STACK]->af && 1449 si->direction != s->direction))) { 1450 bool reuse = false; 1451 1452 if (sk->proto == IPPROTO_TCP && 1453 si->src.state >= TCPS_FIN_WAIT_2 && 1454 si->dst.state >= TCPS_FIN_WAIT_2) 1455 reuse = true; 1456 1457 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1458 printf("pf: %s key attach " 1459 "%s on %s: ", 1460 (idx == PF_SK_WIRE) ? 1461 "wire" : "stack", 1462 reuse ? "reuse" : "failed", 1463 s->kif->pfik_name); 1464 pf_print_state_parts(s, 1465 (idx == PF_SK_WIRE) ? 1466 sk : NULL, 1467 (idx == PF_SK_STACK) ? 1468 sk : NULL); 1469 printf(", existing: "); 1470 pf_print_state_parts(si, 1471 (idx == PF_SK_WIRE) ? 1472 sk : NULL, 1473 (idx == PF_SK_STACK) ? 1474 sk : NULL); 1475 printf("\n"); 1476 } 1477 1478 if (reuse) { 1479 /* 1480 * New state matches an old >FIN_WAIT_2 1481 * state. We can't drop key hash locks, 1482 * thus we can't unlink it properly. 1483 * 1484 * As a workaround we drop it into 1485 * TCPS_CLOSED state, schedule purge 1486 * ASAP and push it into the very end 1487 * of the slot TAILQ, so that it won't 1488 * conflict with our new state. 1489 */ 1490 pf_set_protostate(si, PF_PEER_BOTH, 1491 TCPS_CLOSED); 1492 si->timeout = PFTM_PURGE; 1493 olds = si; 1494 } else { 1495 s->timeout = PFTM_UNLINKED; 1496 if (idx == PF_SK_STACK) 1497 /* 1498 * Remove the wire key from 1499 * the hash. Other threads 1500 * can't be referencing it 1501 * because we still hold the 1502 * hash lock. 1503 */ 1504 pf_state_key_detach(s, 1505 PF_SK_WIRE); 1506 PF_HASHROW_UNLOCK(ih); 1507 KEYS_UNLOCK(); 1508 if (idx == PF_SK_WIRE) 1509 /* 1510 * We've not inserted either key. 1511 * Free both. 1512 */ 1513 uma_zfree(V_pf_state_key_z, skw); 1514 if (skw != sks) 1515 uma_zfree( 1516 V_pf_state_key_z, 1517 sks); 1518 return (EEXIST); /* collision! */ 1519 } 1520 } 1521 PF_HASHROW_UNLOCK(ih); 1522 } 1523 uma_zfree(V_pf_state_key_z, sk); 1524 s->key[idx] = cur; 1525 } else { 1526 LIST_INSERT_HEAD(&kh->keys, sk, entry); 1527 s->key[idx] = sk; 1528 } 1529 1530 stateattach: 1531 /* List is sorted, if-bound states before floating. */ 1532 if (s->kif == V_pfi_all) 1533 TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]); 1534 else 1535 TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]); 1536 1537 if (olds) { 1538 TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]); 1539 TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds, 1540 key_list[idx]); 1541 olds = NULL; 1542 } 1543 1544 /* 1545 * Attach done. See how should we (or should not?) 1546 * attach a second key. 1547 */ 1548 if (sks == skw) { 1549 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 1550 idx = PF_SK_STACK; 1551 sks = NULL; 1552 goto stateattach; 1553 } else if (sks != NULL) { 1554 /* 1555 * Continue attaching with stack key. 1556 */ 1557 sk = sks; 1558 kh = khs; 1559 idx = PF_SK_STACK; 1560 sks = NULL; 1561 goto keyattach; 1562 } 1563 1564 PF_STATE_LOCK(s); 1565 KEYS_UNLOCK(); 1566 1567 KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL, 1568 ("%s failure", __func__)); 1569 1570 return (0); 1571 #undef KEYS_UNLOCK 1572 } 1573 1574 static void 1575 pf_detach_state(struct pf_kstate *s) 1576 { 1577 struct pf_state_key *sks = s->key[PF_SK_STACK]; 1578 struct pf_keyhash *kh; 1579 1580 NET_EPOCH_ASSERT(); 1581 MPASS(s->timeout >= PFTM_MAX); 1582 1583 pf_sctp_multihome_detach_addr(s); 1584 1585 if ((s->state_flags & PFSTATE_PFLOW) && V_pflow_export_state_ptr) 1586 V_pflow_export_state_ptr(s); 1587 1588 if (sks != NULL) { 1589 kh = &V_pf_keyhash[pf_hashkey(sks)]; 1590 PF_HASHROW_LOCK(kh); 1591 if (s->key[PF_SK_STACK] != NULL) 1592 pf_state_key_detach(s, PF_SK_STACK); 1593 /* 1594 * If both point to same key, then we are done. 1595 */ 1596 if (sks == s->key[PF_SK_WIRE]) { 1597 pf_state_key_detach(s, PF_SK_WIRE); 1598 PF_HASHROW_UNLOCK(kh); 1599 return; 1600 } 1601 PF_HASHROW_UNLOCK(kh); 1602 } 1603 1604 if (s->key[PF_SK_WIRE] != NULL) { 1605 kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])]; 1606 PF_HASHROW_LOCK(kh); 1607 if (s->key[PF_SK_WIRE] != NULL) 1608 pf_state_key_detach(s, PF_SK_WIRE); 1609 PF_HASHROW_UNLOCK(kh); 1610 } 1611 } 1612 1613 static void 1614 pf_state_key_detach(struct pf_kstate *s, int idx) 1615 { 1616 struct pf_state_key *sk = s->key[idx]; 1617 #ifdef INVARIANTS 1618 struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)]; 1619 1620 PF_HASHROW_ASSERT(kh); 1621 #endif /* INVARIANTS */ 1622 TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]); 1623 s->key[idx] = NULL; 1624 1625 if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) { 1626 LIST_REMOVE(sk, entry); 1627 uma_zfree(V_pf_state_key_z, sk); 1628 } 1629 } 1630 1631 static int 1632 pf_state_key_ctor(void *mem, int size, void *arg, int flags) 1633 { 1634 struct pf_state_key *sk = mem; 1635 1636 bzero(sk, sizeof(struct pf_state_key_cmp)); 1637 TAILQ_INIT(&sk->states[PF_SK_WIRE]); 1638 TAILQ_INIT(&sk->states[PF_SK_STACK]); 1639 1640 return (0); 1641 } 1642 1643 static int 1644 pf_state_key_addr_setup(struct pf_pdesc *pd, 1645 struct pf_state_key_cmp *key, int multi) 1646 { 1647 struct pf_addr *saddr = pd->src; 1648 struct pf_addr *daddr = pd->dst; 1649 #ifdef INET6 1650 struct nd_neighbor_solicit nd; 1651 struct pf_addr *target; 1652 u_short action, reason; 1653 1654 if (pd->af == AF_INET || pd->proto != IPPROTO_ICMPV6) 1655 goto copy; 1656 1657 switch (pd->hdr.icmp6.icmp6_type) { 1658 case ND_NEIGHBOR_SOLICIT: 1659 if (multi) 1660 return (-1); 1661 if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af)) 1662 return (-1); 1663 target = (struct pf_addr *)&nd.nd_ns_target; 1664 daddr = target; 1665 break; 1666 case ND_NEIGHBOR_ADVERT: 1667 if (multi) 1668 return (-1); 1669 if (!pf_pull_hdr(pd->m, pd->off, &nd, sizeof(nd), &action, &reason, pd->af)) 1670 return (-1); 1671 target = (struct pf_addr *)&nd.nd_ns_target; 1672 saddr = target; 1673 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 1674 key->addr[pd->didx].addr32[0] = 0; 1675 key->addr[pd->didx].addr32[1] = 0; 1676 key->addr[pd->didx].addr32[2] = 0; 1677 key->addr[pd->didx].addr32[3] = 0; 1678 daddr = NULL; /* overwritten */ 1679 } 1680 break; 1681 default: 1682 if (multi) { 1683 key->addr[pd->sidx].addr32[0] = IPV6_ADDR_INT32_MLL; 1684 key->addr[pd->sidx].addr32[1] = 0; 1685 key->addr[pd->sidx].addr32[2] = 0; 1686 key->addr[pd->sidx].addr32[3] = IPV6_ADDR_INT32_ONE; 1687 saddr = NULL; /* overwritten */ 1688 } 1689 } 1690 copy: 1691 #endif /* INET6 */ 1692 if (saddr) 1693 pf_addrcpy(&key->addr[pd->sidx], saddr, pd->af); 1694 if (daddr) 1695 pf_addrcpy(&key->addr[pd->didx], daddr, pd->af); 1696 1697 return (0); 1698 } 1699 1700 int 1701 pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport, 1702 struct pf_state_key **sk, struct pf_state_key **nk) 1703 { 1704 *sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 1705 if (*sk == NULL) 1706 return (ENOMEM); 1707 1708 if (pf_state_key_addr_setup(pd, (struct pf_state_key_cmp *)*sk, 1709 0)) { 1710 uma_zfree(V_pf_state_key_z, *sk); 1711 *sk = NULL; 1712 return (ENOMEM); 1713 } 1714 1715 (*sk)->port[pd->sidx] = sport; 1716 (*sk)->port[pd->didx] = dport; 1717 (*sk)->proto = pd->proto; 1718 (*sk)->af = pd->af; 1719 1720 *nk = pf_state_key_clone(*sk); 1721 if (*nk == NULL) { 1722 uma_zfree(V_pf_state_key_z, *sk); 1723 *sk = NULL; 1724 return (ENOMEM); 1725 } 1726 1727 if (pd->af != pd->naf) { 1728 (*sk)->port[pd->sidx] = pd->osport; 1729 (*sk)->port[pd->didx] = pd->odport; 1730 1731 (*nk)->af = pd->naf; 1732 1733 /* 1734 * We're overwriting an address here, so potentially there's bits of an IPv6 1735 * address left in here. Clear that out first. 1736 */ 1737 bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0])); 1738 bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1])); 1739 if (pd->dir == PF_IN) { 1740 pf_addrcpy(&(*nk)->addr[pd->didx], &pd->nsaddr, 1741 pd->naf); 1742 pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->ndaddr, 1743 pd->naf); 1744 (*nk)->port[pd->didx] = pd->nsport; 1745 (*nk)->port[pd->sidx] = pd->ndport; 1746 } else { 1747 pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->nsaddr, 1748 pd->naf); 1749 pf_addrcpy(&(*nk)->addr[pd->didx], &pd->ndaddr, 1750 pd->naf); 1751 (*nk)->port[pd->sidx] = pd->nsport; 1752 (*nk)->port[pd->didx] = pd->ndport; 1753 } 1754 1755 switch (pd->proto) { 1756 case IPPROTO_ICMP: 1757 (*nk)->proto = IPPROTO_ICMPV6; 1758 break; 1759 case IPPROTO_ICMPV6: 1760 (*nk)->proto = IPPROTO_ICMP; 1761 break; 1762 default: 1763 (*nk)->proto = pd->proto; 1764 } 1765 } 1766 1767 return (0); 1768 } 1769 1770 struct pf_state_key * 1771 pf_state_key_clone(const struct pf_state_key *orig) 1772 { 1773 struct pf_state_key *sk; 1774 1775 sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 1776 if (sk == NULL) 1777 return (NULL); 1778 1779 bcopy(orig, sk, sizeof(struct pf_state_key_cmp)); 1780 1781 return (sk); 1782 } 1783 1784 int 1785 pf_state_insert(struct pfi_kkif *kif, struct pfi_kkif *orig_kif, 1786 struct pf_state_key *skw, struct pf_state_key *sks, struct pf_kstate *s) 1787 { 1788 struct pf_idhash *ih; 1789 struct pf_kstate *cur; 1790 int error; 1791 1792 NET_EPOCH_ASSERT(); 1793 1794 KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]), 1795 ("%s: sks not pristine", __func__)); 1796 KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]), 1797 ("%s: skw not pristine", __func__)); 1798 KASSERT(s->refs == 0, ("%s: state not pristine", __func__)); 1799 1800 s->kif = kif; 1801 s->orig_kif = orig_kif; 1802 1803 if (s->id == 0 && s->creatorid == 0) { 1804 s->id = alloc_unr64(&V_pf_stateid); 1805 s->id = htobe64(s->id); 1806 s->creatorid = V_pf_status.hostid; 1807 } 1808 1809 /* Returns with ID locked on success. */ 1810 if ((error = pf_state_key_attach(skw, sks, s)) != 0) 1811 return (error); 1812 skw = sks = NULL; 1813 1814 ih = &V_pf_idhash[PF_IDHASH(s)]; 1815 PF_HASHROW_ASSERT(ih); 1816 LIST_FOREACH(cur, &ih->states, entry) 1817 if (cur->id == s->id && cur->creatorid == s->creatorid) 1818 break; 1819 1820 if (cur != NULL) { 1821 s->timeout = PFTM_UNLINKED; 1822 PF_HASHROW_UNLOCK(ih); 1823 if (V_pf_status.debug >= PF_DEBUG_MISC) { 1824 printf("pf: state ID collision: " 1825 "id: %016llx creatorid: %08x\n", 1826 (unsigned long long)be64toh(s->id), 1827 ntohl(s->creatorid)); 1828 } 1829 pf_detach_state(s); 1830 return (EEXIST); 1831 } 1832 LIST_INSERT_HEAD(&ih->states, s, entry); 1833 /* One for keys, one for ID hash. */ 1834 refcount_init(&s->refs, 2); 1835 1836 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_INSERT], 1); 1837 if (V_pfsync_insert_state_ptr != NULL) 1838 V_pfsync_insert_state_ptr(s); 1839 1840 /* Returns locked. */ 1841 return (0); 1842 } 1843 1844 /* 1845 * Find state by ID: returns with locked row on success. 1846 */ 1847 struct pf_kstate * 1848 pf_find_state_byid(uint64_t id, uint32_t creatorid) 1849 { 1850 struct pf_idhash *ih; 1851 struct pf_kstate *s; 1852 1853 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); 1854 1855 ih = &V_pf_idhash[PF_IDHASHID(id)]; 1856 1857 PF_HASHROW_LOCK(ih); 1858 LIST_FOREACH(s, &ih->states, entry) 1859 if (s->id == id && s->creatorid == creatorid) 1860 break; 1861 1862 if (s == NULL) 1863 PF_HASHROW_UNLOCK(ih); 1864 1865 return (s); 1866 } 1867 1868 /* 1869 * Find state by key. 1870 * Returns with ID hash slot locked on success. 1871 */ 1872 static int 1873 pf_find_state(struct pf_pdesc *pd, const struct pf_state_key_cmp *key, 1874 struct pf_kstate **state) 1875 { 1876 struct pf_keyhash *kh; 1877 struct pf_state_key *sk; 1878 struct pf_kstate *s; 1879 int idx; 1880 1881 *state = NULL; 1882 1883 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); 1884 1885 kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)]; 1886 1887 PF_HASHROW_LOCK(kh); 1888 LIST_FOREACH(sk, &kh->keys, entry) 1889 if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) 1890 break; 1891 if (sk == NULL) { 1892 PF_HASHROW_UNLOCK(kh); 1893 return (PF_DROP); 1894 } 1895 1896 idx = (pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK); 1897 1898 /* List is sorted, if-bound states before floating ones. */ 1899 TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) 1900 if (s->kif == V_pfi_all || s->kif == pd->kif || 1901 s->orig_kif == pd->kif) { 1902 PF_STATE_LOCK(s); 1903 PF_HASHROW_UNLOCK(kh); 1904 if (__predict_false(s->timeout >= PFTM_MAX)) { 1905 /* 1906 * State is either being processed by 1907 * pf_remove_state() in an other thread, or 1908 * is scheduled for immediate expiry. 1909 */ 1910 PF_STATE_UNLOCK(s); 1911 SDT_PROBE5(pf, ip, state, lookup, pd->kif, 1912 key, (pd->dir), pd, *state); 1913 return (PF_DROP); 1914 } 1915 goto out; 1916 } 1917 1918 /* Look through the other list, in case of AF-TO */ 1919 idx = idx == PF_SK_WIRE ? PF_SK_STACK : PF_SK_WIRE; 1920 TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) { 1921 if (s->key[PF_SK_WIRE]->af == s->key[PF_SK_STACK]->af) 1922 continue; 1923 if (s->kif == V_pfi_all || s->kif == pd->kif || 1924 s->orig_kif == pd->kif) { 1925 PF_STATE_LOCK(s); 1926 PF_HASHROW_UNLOCK(kh); 1927 if (__predict_false(s->timeout >= PFTM_MAX)) { 1928 /* 1929 * State is either being processed by 1930 * pf_remove_state() in an other thread, or 1931 * is scheduled for immediate expiry. 1932 */ 1933 PF_STATE_UNLOCK(s); 1934 SDT_PROBE5(pf, ip, state, lookup, pd->kif, 1935 key, (pd->dir), pd, NULL); 1936 return (PF_DROP); 1937 } 1938 goto out; 1939 } 1940 } 1941 1942 PF_HASHROW_UNLOCK(kh); 1943 1944 out: 1945 SDT_PROBE5(pf, ip, state, lookup, pd->kif, key, (pd->dir), pd, *state); 1946 1947 if (s == NULL || s->timeout == PFTM_PURGE) { 1948 if (s) 1949 PF_STATE_UNLOCK(s); 1950 return (PF_DROP); 1951 } 1952 1953 if ((s)->rule->pktrate.limit && pd->dir == (s)->direction) { 1954 if (pf_check_threshold(&(s)->rule->pktrate)) { 1955 PF_STATE_UNLOCK(s); 1956 return (PF_DROP); 1957 } 1958 } 1959 if (PACKET_LOOPED(pd)) { 1960 PF_STATE_UNLOCK(s); 1961 return (PF_PASS); 1962 } 1963 1964 *state = s; 1965 1966 return (PF_MATCH); 1967 } 1968 1969 /* 1970 * Returns with ID hash slot locked on success. 1971 */ 1972 struct pf_kstate * 1973 pf_find_state_all(const struct pf_state_key_cmp *key, u_int dir, int *more) 1974 { 1975 struct pf_keyhash *kh; 1976 struct pf_state_key *sk; 1977 struct pf_kstate *s, *ret = NULL; 1978 int idx, inout = 0; 1979 1980 if (more != NULL) 1981 *more = 0; 1982 1983 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_SEARCH], 1); 1984 1985 kh = &V_pf_keyhash[pf_hashkey((const struct pf_state_key *)key)]; 1986 1987 PF_HASHROW_LOCK(kh); 1988 LIST_FOREACH(sk, &kh->keys, entry) 1989 if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0) 1990 break; 1991 if (sk == NULL) { 1992 PF_HASHROW_UNLOCK(kh); 1993 return (NULL); 1994 } 1995 switch (dir) { 1996 case PF_IN: 1997 idx = PF_SK_WIRE; 1998 break; 1999 case PF_OUT: 2000 idx = PF_SK_STACK; 2001 break; 2002 case PF_INOUT: 2003 idx = PF_SK_WIRE; 2004 inout = 1; 2005 break; 2006 default: 2007 panic("%s: dir %u", __func__, dir); 2008 } 2009 second_run: 2010 TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) { 2011 if (more == NULL) { 2012 PF_STATE_LOCK(s); 2013 PF_HASHROW_UNLOCK(kh); 2014 return (s); 2015 } 2016 2017 if (ret) 2018 (*more)++; 2019 else { 2020 ret = s; 2021 PF_STATE_LOCK(s); 2022 } 2023 } 2024 if (inout == 1) { 2025 inout = 0; 2026 idx = PF_SK_STACK; 2027 goto second_run; 2028 } 2029 PF_HASHROW_UNLOCK(kh); 2030 2031 return (ret); 2032 } 2033 2034 /* 2035 * FIXME 2036 * This routine is inefficient -- locks the state only to unlock immediately on 2037 * return. 2038 * It is racy -- after the state is unlocked nothing stops other threads from 2039 * removing it. 2040 */ 2041 bool 2042 pf_find_state_all_exists(const struct pf_state_key_cmp *key, u_int dir) 2043 { 2044 struct pf_kstate *s; 2045 2046 s = pf_find_state_all(key, dir, NULL); 2047 if (s != NULL) { 2048 PF_STATE_UNLOCK(s); 2049 return (true); 2050 } 2051 return (false); 2052 } 2053 2054 struct pf_udp_mapping * 2055 pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port, 2056 struct pf_addr *nat_addr, uint16_t nat_port) 2057 { 2058 struct pf_udp_mapping *mapping; 2059 2060 mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO); 2061 if (mapping == NULL) 2062 return (NULL); 2063 pf_addrcpy(&mapping->endpoints[0].addr, src_addr, af); 2064 mapping->endpoints[0].port = src_port; 2065 mapping->endpoints[0].af = af; 2066 mapping->endpoints[0].mapping = mapping; 2067 pf_addrcpy(&mapping->endpoints[1].addr, nat_addr, af); 2068 mapping->endpoints[1].port = nat_port; 2069 mapping->endpoints[1].af = af; 2070 mapping->endpoints[1].mapping = mapping; 2071 refcount_init(&mapping->refs, 1); 2072 return (mapping); 2073 } 2074 2075 int 2076 pf_udp_mapping_insert(struct pf_udp_mapping *mapping) 2077 { 2078 struct pf_udpendpointhash *h0, *h1; 2079 struct pf_udp_endpoint *endpoint; 2080 int ret = EEXIST; 2081 2082 h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])]; 2083 h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])]; 2084 if (h0 == h1) { 2085 PF_HASHROW_LOCK(h0); 2086 } else if (h0 < h1) { 2087 PF_HASHROW_LOCK(h0); 2088 PF_HASHROW_LOCK(h1); 2089 } else { 2090 PF_HASHROW_LOCK(h1); 2091 PF_HASHROW_LOCK(h0); 2092 } 2093 2094 LIST_FOREACH(endpoint, &h0->endpoints, entry) { 2095 if (bcmp(endpoint, &mapping->endpoints[0], 2096 sizeof(struct pf_udp_endpoint_cmp)) == 0) 2097 break; 2098 } 2099 if (endpoint != NULL) 2100 goto cleanup; 2101 LIST_FOREACH(endpoint, &h1->endpoints, entry) { 2102 if (bcmp(endpoint, &mapping->endpoints[1], 2103 sizeof(struct pf_udp_endpoint_cmp)) == 0) 2104 break; 2105 } 2106 if (endpoint != NULL) 2107 goto cleanup; 2108 LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry); 2109 LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry); 2110 ret = 0; 2111 2112 cleanup: 2113 if (h0 != h1) { 2114 PF_HASHROW_UNLOCK(h0); 2115 PF_HASHROW_UNLOCK(h1); 2116 } else { 2117 PF_HASHROW_UNLOCK(h0); 2118 } 2119 return (ret); 2120 } 2121 2122 void 2123 pf_udp_mapping_release(struct pf_udp_mapping *mapping) 2124 { 2125 /* refcount is synchronized on the source endpoint's row lock */ 2126 struct pf_udpendpointhash *h0, *h1; 2127 2128 if (mapping == NULL) 2129 return; 2130 2131 h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])]; 2132 PF_HASHROW_LOCK(h0); 2133 if (refcount_release(&mapping->refs)) { 2134 LIST_REMOVE(&mapping->endpoints[0], entry); 2135 PF_HASHROW_UNLOCK(h0); 2136 h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])]; 2137 PF_HASHROW_LOCK(h1); 2138 LIST_REMOVE(&mapping->endpoints[1], entry); 2139 PF_HASHROW_UNLOCK(h1); 2140 2141 uma_zfree(V_pf_udp_mapping_z, mapping); 2142 } else { 2143 PF_HASHROW_UNLOCK(h0); 2144 } 2145 } 2146 2147 2148 struct pf_udp_mapping * 2149 pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key) 2150 { 2151 struct pf_udpendpointhash *uh; 2152 struct pf_udp_endpoint *endpoint; 2153 2154 uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)]; 2155 2156 PF_HASHROW_LOCK(uh); 2157 LIST_FOREACH(endpoint, &uh->endpoints, entry) { 2158 if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 && 2159 bcmp(endpoint, &endpoint->mapping->endpoints[0], 2160 sizeof(struct pf_udp_endpoint_cmp)) == 0) 2161 break; 2162 } 2163 if (endpoint == NULL) { 2164 PF_HASHROW_UNLOCK(uh); 2165 return (NULL); 2166 } 2167 refcount_acquire(&endpoint->mapping->refs); 2168 PF_HASHROW_UNLOCK(uh); 2169 return (endpoint->mapping); 2170 } 2171 /* END state table stuff */ 2172 2173 static void 2174 pf_send(struct pf_send_entry *pfse) 2175 { 2176 2177 PF_SENDQ_LOCK(); 2178 STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next); 2179 PF_SENDQ_UNLOCK(); 2180 swi_sched(V_pf_swi_cookie, 0); 2181 } 2182 2183 static bool 2184 pf_isforlocal(struct mbuf *m, int af) 2185 { 2186 switch (af) { 2187 #ifdef INET 2188 case AF_INET: { 2189 struct ip *ip = mtod(m, struct ip *); 2190 2191 return (in_localip(ip->ip_dst)); 2192 } 2193 #endif /* INET */ 2194 #ifdef INET6 2195 case AF_INET6: { 2196 struct ip6_hdr *ip6; 2197 struct in6_ifaddr *ia; 2198 ip6 = mtod(m, struct ip6_hdr *); 2199 ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false); 2200 if (ia == NULL) 2201 return (false); 2202 return (! (ia->ia6_flags & IN6_IFF_NOTREADY)); 2203 } 2204 #endif /* INET6 */ 2205 default: 2206 unhandled_af(af); 2207 } 2208 2209 return (false); 2210 } 2211 2212 int 2213 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, 2214 int *icmp_dir, u_int16_t *virtual_id, u_int16_t *virtual_type) 2215 { 2216 /* 2217 * ICMP types marked with PF_OUT are typically responses to 2218 * PF_IN, and will match states in the opposite direction. 2219 * PF_IN ICMP types need to match a state with that type. 2220 */ 2221 *icmp_dir = PF_OUT; 2222 2223 /* Queries (and responses) */ 2224 switch (pd->af) { 2225 #ifdef INET 2226 case AF_INET: 2227 switch (type) { 2228 case ICMP_ECHO: 2229 *icmp_dir = PF_IN; 2230 /* FALLTHROUGH */ 2231 case ICMP_ECHOREPLY: 2232 *virtual_type = ICMP_ECHO; 2233 *virtual_id = pd->hdr.icmp.icmp_id; 2234 break; 2235 2236 case ICMP_TSTAMP: 2237 *icmp_dir = PF_IN; 2238 /* FALLTHROUGH */ 2239 case ICMP_TSTAMPREPLY: 2240 *virtual_type = ICMP_TSTAMP; 2241 *virtual_id = pd->hdr.icmp.icmp_id; 2242 break; 2243 2244 case ICMP_IREQ: 2245 *icmp_dir = PF_IN; 2246 /* FALLTHROUGH */ 2247 case ICMP_IREQREPLY: 2248 *virtual_type = ICMP_IREQ; 2249 *virtual_id = pd->hdr.icmp.icmp_id; 2250 break; 2251 2252 case ICMP_MASKREQ: 2253 *icmp_dir = PF_IN; 2254 /* FALLTHROUGH */ 2255 case ICMP_MASKREPLY: 2256 *virtual_type = ICMP_MASKREQ; 2257 *virtual_id = pd->hdr.icmp.icmp_id; 2258 break; 2259 2260 case ICMP_IPV6_WHEREAREYOU: 2261 *icmp_dir = PF_IN; 2262 /* FALLTHROUGH */ 2263 case ICMP_IPV6_IAMHERE: 2264 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2265 *virtual_id = 0; /* Nothing sane to match on! */ 2266 break; 2267 2268 case ICMP_MOBILE_REGREQUEST: 2269 *icmp_dir = PF_IN; 2270 /* FALLTHROUGH */ 2271 case ICMP_MOBILE_REGREPLY: 2272 *virtual_type = ICMP_MOBILE_REGREQUEST; 2273 *virtual_id = 0; /* Nothing sane to match on! */ 2274 break; 2275 2276 case ICMP_ROUTERSOLICIT: 2277 *icmp_dir = PF_IN; 2278 /* FALLTHROUGH */ 2279 case ICMP_ROUTERADVERT: 2280 *virtual_type = ICMP_ROUTERSOLICIT; 2281 *virtual_id = 0; /* Nothing sane to match on! */ 2282 break; 2283 2284 /* These ICMP types map to other connections */ 2285 case ICMP_UNREACH: 2286 case ICMP_SOURCEQUENCH: 2287 case ICMP_REDIRECT: 2288 case ICMP_TIMXCEED: 2289 case ICMP_PARAMPROB: 2290 /* These will not be used, but set them anyway */ 2291 *icmp_dir = PF_IN; 2292 *virtual_type = type; 2293 *virtual_id = 0; 2294 *virtual_type = htons(*virtual_type); 2295 return (1); /* These types match to another state */ 2296 2297 /* 2298 * All remaining ICMP types get their own states, 2299 * and will only match in one direction. 2300 */ 2301 default: 2302 *icmp_dir = PF_IN; 2303 *virtual_type = type; 2304 *virtual_id = 0; 2305 break; 2306 } 2307 break; 2308 #endif /* INET */ 2309 #ifdef INET6 2310 case AF_INET6: 2311 switch (type) { 2312 case ICMP6_ECHO_REQUEST: 2313 *icmp_dir = PF_IN; 2314 /* FALLTHROUGH */ 2315 case ICMP6_ECHO_REPLY: 2316 *virtual_type = ICMP6_ECHO_REQUEST; 2317 *virtual_id = pd->hdr.icmp6.icmp6_id; 2318 break; 2319 2320 case MLD_LISTENER_QUERY: 2321 case MLD_LISTENER_REPORT: { 2322 /* 2323 * Listener Report can be sent by clients 2324 * without an associated Listener Query. 2325 * In addition to that, when Report is sent as a 2326 * reply to a Query its source and destination 2327 * address are different. 2328 */ 2329 *icmp_dir = PF_IN; 2330 *virtual_type = MLD_LISTENER_QUERY; 2331 *virtual_id = 0; 2332 break; 2333 } 2334 case MLD_MTRACE: 2335 *icmp_dir = PF_IN; 2336 /* FALLTHROUGH */ 2337 case MLD_MTRACE_RESP: 2338 *virtual_type = MLD_MTRACE; 2339 *virtual_id = 0; /* Nothing sane to match on! */ 2340 break; 2341 2342 case ND_NEIGHBOR_SOLICIT: 2343 *icmp_dir = PF_IN; 2344 /* FALLTHROUGH */ 2345 case ND_NEIGHBOR_ADVERT: { 2346 *virtual_type = ND_NEIGHBOR_SOLICIT; 2347 *virtual_id = 0; 2348 break; 2349 } 2350 2351 /* 2352 * These ICMP types map to other connections. 2353 * ND_REDIRECT can't be in this list because the triggering 2354 * packet header is optional. 2355 */ 2356 case ICMP6_DST_UNREACH: 2357 case ICMP6_PACKET_TOO_BIG: 2358 case ICMP6_TIME_EXCEEDED: 2359 case ICMP6_PARAM_PROB: 2360 /* These will not be used, but set them anyway */ 2361 *icmp_dir = PF_IN; 2362 *virtual_type = type; 2363 *virtual_id = 0; 2364 *virtual_type = htons(*virtual_type); 2365 return (1); /* These types match to another state */ 2366 /* 2367 * All remaining ICMP6 types get their own states, 2368 * and will only match in one direction. 2369 */ 2370 default: 2371 *icmp_dir = PF_IN; 2372 *virtual_type = type; 2373 *virtual_id = 0; 2374 break; 2375 } 2376 break; 2377 #endif /* INET6 */ 2378 default: 2379 unhandled_af(pd->af); 2380 } 2381 *virtual_type = htons(*virtual_type); 2382 return (0); /* These types match to their own state */ 2383 } 2384 2385 void 2386 pf_intr(void *v) 2387 { 2388 struct epoch_tracker et; 2389 struct pf_send_head queue; 2390 struct pf_send_entry *pfse, *next; 2391 2392 CURVNET_SET((struct vnet *)v); 2393 2394 PF_SENDQ_LOCK(); 2395 queue = V_pf_sendqueue; 2396 STAILQ_INIT(&V_pf_sendqueue); 2397 PF_SENDQ_UNLOCK(); 2398 2399 NET_EPOCH_ENTER(et); 2400 2401 STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) { 2402 switch (pfse->pfse_type) { 2403 #ifdef INET 2404 case PFSE_IP: { 2405 if (pf_isforlocal(pfse->pfse_m, AF_INET)) { 2406 KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif, 2407 ("%s: rcvif != loif", __func__)); 2408 2409 pfse->pfse_m->m_flags |= M_SKIP_FIREWALL; 2410 pfse->pfse_m->m_pkthdr.csum_flags |= 2411 CSUM_IP_VALID | CSUM_IP_CHECKED | 2412 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2413 pfse->pfse_m->m_pkthdr.csum_data = 0xffff; 2414 ip_input(pfse->pfse_m); 2415 } else { 2416 ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, 2417 NULL); 2418 } 2419 break; 2420 } 2421 case PFSE_ICMP: 2422 icmp_error(pfse->pfse_m, pfse->icmpopts.type, 2423 pfse->icmpopts.code, 0, pfse->icmpopts.mtu); 2424 break; 2425 #endif /* INET */ 2426 #ifdef INET6 2427 case PFSE_IP6: 2428 if (pf_isforlocal(pfse->pfse_m, AF_INET6)) { 2429 KASSERT(pfse->pfse_m->m_pkthdr.rcvif == V_loif, 2430 ("%s: rcvif != loif", __func__)); 2431 2432 pfse->pfse_m->m_flags |= M_SKIP_FIREWALL | 2433 M_LOOP; 2434 pfse->pfse_m->m_pkthdr.csum_flags |= 2435 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2436 pfse->pfse_m->m_pkthdr.csum_data = 0xffff; 2437 ip6_input(pfse->pfse_m); 2438 } else { 2439 ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, 2440 NULL, NULL); 2441 } 2442 break; 2443 case PFSE_ICMP6: 2444 icmp6_error(pfse->pfse_m, pfse->icmpopts.type, 2445 pfse->icmpopts.code, pfse->icmpopts.mtu); 2446 break; 2447 #endif /* INET6 */ 2448 default: 2449 panic("%s: unknown type", __func__); 2450 } 2451 free(pfse, M_PFTEMP); 2452 } 2453 NET_EPOCH_EXIT(et); 2454 CURVNET_RESTORE(); 2455 } 2456 2457 #define pf_purge_thread_period (hz / 10) 2458 2459 #ifdef PF_WANT_32_TO_64_COUNTER 2460 static void 2461 pf_status_counter_u64_periodic(void) 2462 { 2463 2464 PF_RULES_RASSERT(); 2465 2466 if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 60)) != 0) { 2467 return; 2468 } 2469 2470 for (int i = 0; i < FCNT_MAX; i++) { 2471 pf_counter_u64_periodic(&V_pf_status.fcounters[i]); 2472 } 2473 } 2474 2475 static void 2476 pf_kif_counter_u64_periodic(void) 2477 { 2478 struct pfi_kkif *kif; 2479 size_t r, run; 2480 2481 PF_RULES_RASSERT(); 2482 2483 if (__predict_false(V_pf_allkifcount == 0)) { 2484 return; 2485 } 2486 2487 if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) { 2488 return; 2489 } 2490 2491 run = V_pf_allkifcount / 10; 2492 if (run < 5) 2493 run = 5; 2494 2495 for (r = 0; r < run; r++) { 2496 kif = LIST_NEXT(V_pf_kifmarker, pfik_allkiflist); 2497 if (kif == NULL) { 2498 LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist); 2499 LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist); 2500 break; 2501 } 2502 2503 LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist); 2504 LIST_INSERT_AFTER(kif, V_pf_kifmarker, pfik_allkiflist); 2505 2506 for (int i = 0; i < 2; i++) { 2507 for (int j = 0; j < 2; j++) { 2508 for (int k = 0; k < 2; k++) { 2509 pf_counter_u64_periodic(&kif->pfik_packets[i][j][k]); 2510 pf_counter_u64_periodic(&kif->pfik_bytes[i][j][k]); 2511 } 2512 } 2513 } 2514 } 2515 } 2516 2517 static void 2518 pf_rule_counter_u64_periodic(void) 2519 { 2520 struct pf_krule *rule; 2521 size_t r, run; 2522 2523 PF_RULES_RASSERT(); 2524 2525 if (__predict_false(V_pf_allrulecount == 0)) { 2526 return; 2527 } 2528 2529 if ((V_pf_counter_periodic_iter % (pf_purge_thread_period * 10 * 300)) != 0) { 2530 return; 2531 } 2532 2533 run = V_pf_allrulecount / 10; 2534 if (run < 5) 2535 run = 5; 2536 2537 for (r = 0; r < run; r++) { 2538 rule = LIST_NEXT(V_pf_rulemarker, allrulelist); 2539 if (rule == NULL) { 2540 LIST_REMOVE(V_pf_rulemarker, allrulelist); 2541 LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist); 2542 break; 2543 } 2544 2545 LIST_REMOVE(V_pf_rulemarker, allrulelist); 2546 LIST_INSERT_AFTER(rule, V_pf_rulemarker, allrulelist); 2547 2548 pf_counter_u64_periodic(&rule->evaluations); 2549 for (int i = 0; i < 2; i++) { 2550 pf_counter_u64_periodic(&rule->packets[i]); 2551 pf_counter_u64_periodic(&rule->bytes[i]); 2552 } 2553 } 2554 } 2555 2556 static void 2557 pf_counter_u64_periodic_main(void) 2558 { 2559 PF_RULES_RLOCK_TRACKER; 2560 2561 V_pf_counter_periodic_iter++; 2562 2563 PF_RULES_RLOCK(); 2564 pf_counter_u64_critical_enter(); 2565 pf_status_counter_u64_periodic(); 2566 pf_kif_counter_u64_periodic(); 2567 pf_rule_counter_u64_periodic(); 2568 pf_counter_u64_critical_exit(); 2569 PF_RULES_RUNLOCK(); 2570 } 2571 #else 2572 #define pf_counter_u64_periodic_main() do { } while (0) 2573 #endif 2574 2575 void 2576 pf_purge_thread(void *unused __unused) 2577 { 2578 struct epoch_tracker et; 2579 2580 VNET_ITERATOR_DECL(vnet_iter); 2581 2582 sx_xlock(&pf_end_lock); 2583 while (pf_end_threads == 0) { 2584 sx_sleep(pf_purge_thread, &pf_end_lock, 0, "pftm", pf_purge_thread_period); 2585 2586 VNET_LIST_RLOCK(); 2587 NET_EPOCH_ENTER(et); 2588 VNET_FOREACH(vnet_iter) { 2589 CURVNET_SET(vnet_iter); 2590 2591 /* Wait until V_pf_default_rule is initialized. */ 2592 if (V_pf_vnet_active == 0) { 2593 CURVNET_RESTORE(); 2594 continue; 2595 } 2596 2597 pf_counter_u64_periodic_main(); 2598 2599 /* 2600 * Process 1/interval fraction of the state 2601 * table every run. 2602 */ 2603 V_pf_purge_idx = 2604 pf_purge_expired_states(V_pf_purge_idx, V_pf_hashmask / 2605 (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10)); 2606 2607 /* 2608 * Purge other expired types every 2609 * PFTM_INTERVAL seconds. 2610 */ 2611 if (V_pf_purge_idx == 0) { 2612 /* 2613 * Order is important: 2614 * - states and src nodes reference rules 2615 * - states and rules reference kifs 2616 */ 2617 pf_purge_expired_fragments(); 2618 pf_purge_expired_src_nodes(); 2619 pf_purge_unlinked_rules(); 2620 pfi_kkif_purge(); 2621 } 2622 CURVNET_RESTORE(); 2623 } 2624 NET_EPOCH_EXIT(et); 2625 VNET_LIST_RUNLOCK(); 2626 } 2627 2628 pf_end_threads++; 2629 sx_xunlock(&pf_end_lock); 2630 kproc_exit(0); 2631 } 2632 2633 void 2634 pf_unload_vnet_purge(void) 2635 { 2636 2637 /* 2638 * To cleanse up all kifs and rules we need 2639 * two runs: first one clears reference flags, 2640 * then pf_purge_expired_states() doesn't 2641 * raise them, and then second run frees. 2642 */ 2643 pf_purge_unlinked_rules(); 2644 pfi_kkif_purge(); 2645 2646 /* 2647 * Now purge everything. 2648 */ 2649 pf_purge_expired_states(0, V_pf_hashmask); 2650 pf_purge_fragments(UINT_MAX); 2651 pf_purge_expired_src_nodes(); 2652 2653 /* 2654 * Now all kifs & rules should be unreferenced, 2655 * thus should be successfully freed. 2656 */ 2657 pf_purge_unlinked_rules(); 2658 pfi_kkif_purge(); 2659 } 2660 2661 u_int32_t 2662 pf_state_expires(const struct pf_kstate *state) 2663 { 2664 u_int32_t timeout; 2665 u_int32_t start; 2666 u_int32_t end; 2667 u_int32_t states; 2668 2669 /* handle all PFTM_* > PFTM_MAX here */ 2670 if (state->timeout == PFTM_PURGE) 2671 return (time_uptime); 2672 KASSERT(state->timeout != PFTM_UNLINKED, 2673 ("pf_state_expires: timeout == PFTM_UNLINKED")); 2674 KASSERT((state->timeout < PFTM_MAX), 2675 ("pf_state_expires: timeout > PFTM_MAX")); 2676 timeout = state->rule->timeout[state->timeout]; 2677 if (!timeout) 2678 timeout = V_pf_default_rule.timeout[state->timeout]; 2679 start = state->rule->timeout[PFTM_ADAPTIVE_START]; 2680 if (start && state->rule != &V_pf_default_rule) { 2681 end = state->rule->timeout[PFTM_ADAPTIVE_END]; 2682 states = counter_u64_fetch(state->rule->states_cur); 2683 } else { 2684 start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 2685 end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 2686 states = V_pf_status.states; 2687 } 2688 if (end && states > start && start < end) { 2689 if (states < end) { 2690 timeout = (u_int64_t)timeout * (end - states) / 2691 (end - start); 2692 return ((state->expire / 1000) + timeout); 2693 } 2694 else 2695 return (time_uptime); 2696 } 2697 return ((state->expire / 1000) + timeout); 2698 } 2699 2700 void 2701 pf_purge_expired_src_nodes(void) 2702 { 2703 struct pf_ksrc_node_list freelist; 2704 struct pf_srchash *sh; 2705 struct pf_ksrc_node *cur, *next; 2706 int i; 2707 2708 LIST_INIT(&freelist); 2709 for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { 2710 PF_HASHROW_LOCK(sh); 2711 LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next) 2712 if (cur->states == 0 && cur->expire <= time_uptime) { 2713 pf_unlink_src_node(cur); 2714 LIST_INSERT_HEAD(&freelist, cur, entry); 2715 } else if (cur->rule != NULL) 2716 cur->rule->rule_ref |= PFRULE_REFS; 2717 PF_HASHROW_UNLOCK(sh); 2718 } 2719 2720 pf_free_src_nodes(&freelist); 2721 2722 V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z); 2723 } 2724 2725 static void 2726 pf_src_tree_remove_state(struct pf_kstate *s) 2727 { 2728 uint32_t timeout; 2729 2730 timeout = s->rule->timeout[PFTM_SRC_NODE] ? 2731 s->rule->timeout[PFTM_SRC_NODE] : 2732 V_pf_default_rule.timeout[PFTM_SRC_NODE]; 2733 2734 for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) { 2735 if (s->sns[sn_type] == NULL) 2736 continue; 2737 PF_SRC_NODE_LOCK(s->sns[sn_type]); 2738 if (sn_type == PF_SN_LIMIT && s->src.tcp_est) 2739 --(s->sns[sn_type]->conn); 2740 if (--(s->sns[sn_type]->states) == 0) 2741 s->sns[sn_type]->expire = time_uptime + timeout; 2742 PF_SRC_NODE_UNLOCK(s->sns[sn_type]); 2743 s->sns[sn_type] = NULL; 2744 } 2745 2746 } 2747 2748 /* 2749 * Unlink and potentilly free a state. Function may be 2750 * called with ID hash row locked, but always returns 2751 * unlocked, since it needs to go through key hash locking. 2752 */ 2753 int 2754 pf_remove_state(struct pf_kstate *s) 2755 { 2756 struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)]; 2757 2758 NET_EPOCH_ASSERT(); 2759 PF_HASHROW_ASSERT(ih); 2760 2761 if (s->timeout == PFTM_UNLINKED) { 2762 /* 2763 * State is being processed 2764 * by pf_remove_state() in 2765 * an other thread. 2766 */ 2767 PF_HASHROW_UNLOCK(ih); 2768 return (0); /* XXXGL: undefined actually */ 2769 } 2770 2771 if (s->src.state == PF_TCPS_PROXY_DST) { 2772 /* XXX wire key the right one? */ 2773 pf_send_tcp(s->rule, s->key[PF_SK_WIRE]->af, 2774 &s->key[PF_SK_WIRE]->addr[1], 2775 &s->key[PF_SK_WIRE]->addr[0], 2776 s->key[PF_SK_WIRE]->port[1], 2777 s->key[PF_SK_WIRE]->port[0], 2778 s->src.seqhi, s->src.seqlo + 1, 2779 TH_RST|TH_ACK, 0, 0, 0, M_SKIP_FIREWALL, s->tag, 0, 2780 s->act.rtableid); 2781 } 2782 2783 LIST_REMOVE(s, entry); 2784 pf_src_tree_remove_state(s); 2785 2786 if (V_pfsync_delete_state_ptr != NULL) 2787 V_pfsync_delete_state_ptr(s); 2788 2789 STATE_DEC_COUNTERS(s); 2790 2791 s->timeout = PFTM_UNLINKED; 2792 2793 /* Ensure we remove it from the list of halfopen states, if needed. */ 2794 if (s->key[PF_SK_STACK] != NULL && 2795 s->key[PF_SK_STACK]->proto == IPPROTO_TCP) 2796 pf_set_protostate(s, PF_PEER_BOTH, TCPS_CLOSED); 2797 2798 PF_HASHROW_UNLOCK(ih); 2799 2800 pf_detach_state(s); 2801 2802 pf_udp_mapping_release(s->udp_mapping); 2803 2804 /* pf_state_insert() initialises refs to 2 */ 2805 return (pf_release_staten(s, 2)); 2806 } 2807 2808 struct pf_kstate * 2809 pf_alloc_state(int flags) 2810 { 2811 2812 return (uma_zalloc(V_pf_state_z, flags | M_ZERO)); 2813 } 2814 2815 void 2816 pf_free_state(struct pf_kstate *cur) 2817 { 2818 struct pf_krule_item *ri; 2819 2820 KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur)); 2821 KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__, 2822 cur->timeout)); 2823 2824 while ((ri = SLIST_FIRST(&cur->match_rules))) { 2825 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 2826 free(ri, M_PF_RULE_ITEM); 2827 } 2828 2829 pf_normalize_tcp_cleanup(cur); 2830 uma_zfree(V_pf_state_z, cur); 2831 pf_counter_u64_add(&V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1); 2832 } 2833 2834 /* 2835 * Called only from pf_purge_thread(), thus serialized. 2836 */ 2837 static u_int 2838 pf_purge_expired_states(u_int i, int maxcheck) 2839 { 2840 struct pf_idhash *ih; 2841 struct pf_kstate *s; 2842 struct pf_krule_item *mrm; 2843 size_t count __unused; 2844 2845 V_pf_status.states = uma_zone_get_cur(V_pf_state_z); 2846 2847 /* 2848 * Go through hash and unlink states that expire now. 2849 */ 2850 while (maxcheck > 0) { 2851 count = 0; 2852 ih = &V_pf_idhash[i]; 2853 2854 /* only take the lock if we expect to do work */ 2855 if (!LIST_EMPTY(&ih->states)) { 2856 relock: 2857 PF_HASHROW_LOCK(ih); 2858 LIST_FOREACH(s, &ih->states, entry) { 2859 if (pf_state_expires(s) <= time_uptime) { 2860 V_pf_status.states -= 2861 pf_remove_state(s); 2862 goto relock; 2863 } 2864 s->rule->rule_ref |= PFRULE_REFS; 2865 if (s->nat_rule != NULL) 2866 s->nat_rule->rule_ref |= PFRULE_REFS; 2867 if (s->anchor != NULL) 2868 s->anchor->rule_ref |= PFRULE_REFS; 2869 s->kif->pfik_flags |= PFI_IFLAG_REFS; 2870 SLIST_FOREACH(mrm, &s->match_rules, entry) 2871 mrm->r->rule_ref |= PFRULE_REFS; 2872 if (s->act.rt_kif) 2873 s->act.rt_kif->pfik_flags |= PFI_IFLAG_REFS; 2874 count++; 2875 } 2876 PF_HASHROW_UNLOCK(ih); 2877 } 2878 2879 SDT_PROBE2(pf, purge, state, rowcount, i, count); 2880 2881 /* Return when we hit end of hash. */ 2882 if (++i > V_pf_hashmask) { 2883 V_pf_status.states = uma_zone_get_cur(V_pf_state_z); 2884 return (0); 2885 } 2886 2887 maxcheck--; 2888 } 2889 2890 V_pf_status.states = uma_zone_get_cur(V_pf_state_z); 2891 2892 return (i); 2893 } 2894 2895 static void 2896 pf_purge_unlinked_rules(void) 2897 { 2898 struct pf_krulequeue tmpq; 2899 struct pf_krule *r, *r1; 2900 2901 /* 2902 * If we have overloading task pending, then we'd 2903 * better skip purging this time. There is a tiny 2904 * probability that overloading task references 2905 * an already unlinked rule. 2906 */ 2907 PF_OVERLOADQ_LOCK(); 2908 if (!SLIST_EMPTY(&V_pf_overloadqueue)) { 2909 PF_OVERLOADQ_UNLOCK(); 2910 return; 2911 } 2912 PF_OVERLOADQ_UNLOCK(); 2913 2914 /* 2915 * Do naive mark-and-sweep garbage collecting of old rules. 2916 * Reference flag is raised by pf_purge_expired_states() 2917 * and pf_purge_expired_src_nodes(). 2918 * 2919 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK, 2920 * use a temporary queue. 2921 */ 2922 TAILQ_INIT(&tmpq); 2923 PF_UNLNKDRULES_LOCK(); 2924 TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) { 2925 if (!(r->rule_ref & PFRULE_REFS)) { 2926 TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries); 2927 TAILQ_INSERT_TAIL(&tmpq, r, entries); 2928 } else 2929 r->rule_ref &= ~PFRULE_REFS; 2930 } 2931 PF_UNLNKDRULES_UNLOCK(); 2932 2933 if (!TAILQ_EMPTY(&tmpq)) { 2934 PF_CONFIG_LOCK(); 2935 PF_RULES_WLOCK(); 2936 TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) { 2937 TAILQ_REMOVE(&tmpq, r, entries); 2938 pf_free_rule(r); 2939 } 2940 PF_RULES_WUNLOCK(); 2941 PF_CONFIG_UNLOCK(); 2942 } 2943 } 2944 2945 void 2946 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 2947 { 2948 switch (af) { 2949 #ifdef INET 2950 case AF_INET: { 2951 u_int32_t a = ntohl(addr->addr32[0]); 2952 printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 2953 (a>>8)&255, a&255); 2954 if (p) { 2955 p = ntohs(p); 2956 printf(":%u", p); 2957 } 2958 break; 2959 } 2960 #endif /* INET */ 2961 #ifdef INET6 2962 case AF_INET6: { 2963 u_int16_t b; 2964 u_int8_t i, curstart, curend, maxstart, maxend; 2965 curstart = curend = maxstart = maxend = 255; 2966 for (i = 0; i < 8; i++) { 2967 if (!addr->addr16[i]) { 2968 if (curstart == 255) 2969 curstart = i; 2970 curend = i; 2971 } else { 2972 if ((curend - curstart) > 2973 (maxend - maxstart)) { 2974 maxstart = curstart; 2975 maxend = curend; 2976 } 2977 curstart = curend = 255; 2978 } 2979 } 2980 if ((curend - curstart) > 2981 (maxend - maxstart)) { 2982 maxstart = curstart; 2983 maxend = curend; 2984 } 2985 for (i = 0; i < 8; i++) { 2986 if (i >= maxstart && i <= maxend) { 2987 if (i == 0) 2988 printf(":"); 2989 if (i == maxend) 2990 printf(":"); 2991 } else { 2992 b = ntohs(addr->addr16[i]); 2993 printf("%x", b); 2994 if (i < 7) 2995 printf(":"); 2996 } 2997 } 2998 if (p) { 2999 p = ntohs(p); 3000 printf("[%u]", p); 3001 } 3002 break; 3003 } 3004 #endif /* INET6 */ 3005 default: 3006 unhandled_af(af); 3007 } 3008 } 3009 3010 void 3011 pf_print_state(struct pf_kstate *s) 3012 { 3013 pf_print_state_parts(s, NULL, NULL); 3014 } 3015 3016 static void 3017 pf_print_state_parts(struct pf_kstate *s, 3018 struct pf_state_key *skwp, struct pf_state_key *sksp) 3019 { 3020 struct pf_state_key *skw, *sks; 3021 u_int8_t proto, dir; 3022 3023 /* Do our best to fill these, but they're skipped if NULL */ 3024 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 3025 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 3026 proto = skw ? skw->proto : (sks ? sks->proto : 0); 3027 dir = s ? s->direction : 0; 3028 3029 switch (proto) { 3030 case IPPROTO_IPV4: 3031 printf("IPv4"); 3032 break; 3033 case IPPROTO_IPV6: 3034 printf("IPv6"); 3035 break; 3036 case IPPROTO_TCP: 3037 printf("TCP"); 3038 break; 3039 case IPPROTO_UDP: 3040 printf("UDP"); 3041 break; 3042 case IPPROTO_ICMP: 3043 printf("ICMP"); 3044 break; 3045 case IPPROTO_ICMPV6: 3046 printf("ICMPv6"); 3047 break; 3048 default: 3049 printf("%u", proto); 3050 break; 3051 } 3052 switch (dir) { 3053 case PF_IN: 3054 printf(" in"); 3055 break; 3056 case PF_OUT: 3057 printf(" out"); 3058 break; 3059 } 3060 if (skw) { 3061 printf(" wire: "); 3062 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 3063 printf(" "); 3064 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 3065 } 3066 if (sks) { 3067 printf(" stack: "); 3068 if (sks != skw) { 3069 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 3070 printf(" "); 3071 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 3072 } else 3073 printf("-"); 3074 } 3075 if (s) { 3076 if (proto == IPPROTO_TCP) { 3077 printf(" [lo=%u high=%u win=%u modulator=%u", 3078 s->src.seqlo, s->src.seqhi, 3079 s->src.max_win, s->src.seqdiff); 3080 if (s->src.wscale && s->dst.wscale) 3081 printf(" wscale=%u", 3082 s->src.wscale & PF_WSCALE_MASK); 3083 printf("]"); 3084 printf(" [lo=%u high=%u win=%u modulator=%u", 3085 s->dst.seqlo, s->dst.seqhi, 3086 s->dst.max_win, s->dst.seqdiff); 3087 if (s->src.wscale && s->dst.wscale) 3088 printf(" wscale=%u", 3089 s->dst.wscale & PF_WSCALE_MASK); 3090 printf("]"); 3091 } 3092 printf(" %u:%u", s->src.state, s->dst.state); 3093 if (s->rule) 3094 printf(" @%d", s->rule->nr); 3095 } 3096 } 3097 3098 void 3099 pf_print_flags(uint16_t f) 3100 { 3101 if (f) 3102 printf(" "); 3103 if (f & TH_FIN) 3104 printf("F"); 3105 if (f & TH_SYN) 3106 printf("S"); 3107 if (f & TH_RST) 3108 printf("R"); 3109 if (f & TH_PUSH) 3110 printf("P"); 3111 if (f & TH_ACK) 3112 printf("A"); 3113 if (f & TH_URG) 3114 printf("U"); 3115 if (f & TH_ECE) 3116 printf("E"); 3117 if (f & TH_CWR) 3118 printf("W"); 3119 if (f & TH_AE) 3120 printf("e"); 3121 } 3122 3123 #define PF_SET_SKIP_STEPS(i) \ 3124 do { \ 3125 while (head[i] != cur) { \ 3126 head[i]->skip[i] = cur; \ 3127 head[i] = TAILQ_NEXT(head[i], entries); \ 3128 } \ 3129 } while (0) 3130 3131 void 3132 pf_calc_skip_steps(struct pf_krulequeue *rules) 3133 { 3134 struct pf_krule *cur, *prev, *head[PF_SKIP_COUNT]; 3135 int i; 3136 3137 cur = TAILQ_FIRST(rules); 3138 prev = cur; 3139 for (i = 0; i < PF_SKIP_COUNT; ++i) 3140 head[i] = cur; 3141 while (cur != NULL) { 3142 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 3143 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 3144 if (cur->direction != prev->direction) 3145 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 3146 if (cur->af != prev->af) 3147 PF_SET_SKIP_STEPS(PF_SKIP_AF); 3148 if (cur->proto != prev->proto) 3149 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 3150 if (cur->src.neg != prev->src.neg || 3151 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 3152 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 3153 if (cur->dst.neg != prev->dst.neg || 3154 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 3155 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 3156 if (cur->src.port[0] != prev->src.port[0] || 3157 cur->src.port[1] != prev->src.port[1] || 3158 cur->src.port_op != prev->src.port_op) 3159 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 3160 if (cur->dst.port[0] != prev->dst.port[0] || 3161 cur->dst.port[1] != prev->dst.port[1] || 3162 cur->dst.port_op != prev->dst.port_op) 3163 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 3164 3165 prev = cur; 3166 cur = TAILQ_NEXT(cur, entries); 3167 } 3168 for (i = 0; i < PF_SKIP_COUNT; ++i) 3169 PF_SET_SKIP_STEPS(i); 3170 } 3171 3172 int 3173 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 3174 { 3175 if (aw1->type != aw2->type) 3176 return (1); 3177 switch (aw1->type) { 3178 case PF_ADDR_ADDRMASK: 3179 case PF_ADDR_RANGE: 3180 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 3181 return (1); 3182 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 3183 return (1); 3184 return (0); 3185 case PF_ADDR_DYNIFTL: 3186 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 3187 case PF_ADDR_NONE: 3188 case PF_ADDR_NOROUTE: 3189 case PF_ADDR_URPFFAILED: 3190 return (0); 3191 case PF_ADDR_TABLE: 3192 return (aw1->p.tbl != aw2->p.tbl); 3193 default: 3194 printf("invalid address type: %d\n", aw1->type); 3195 return (1); 3196 } 3197 } 3198 3199 /** 3200 * Checksum updates are a little complicated because the checksum in the TCP/UDP 3201 * header isn't always a full checksum. In some cases (i.e. output) it's a 3202 * pseudo-header checksum, which is a partial checksum over src/dst IP 3203 * addresses, protocol number and length. 3204 * 3205 * That means we have the following cases: 3206 * * Input or forwarding: we don't have TSO, the checksum fields are full 3207 * checksums, we need to update the checksum whenever we change anything. 3208 * * Output (i.e. the checksum is a pseudo-header checksum): 3209 * x The field being updated is src/dst address or affects the length of 3210 * the packet. We need to update the pseudo-header checksum (note that this 3211 * checksum is not ones' complement). 3212 * x Some other field is being modified (e.g. src/dst port numbers): We 3213 * don't have to update anything. 3214 **/ 3215 u_int16_t 3216 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) 3217 { 3218 u_int32_t x; 3219 3220 x = cksum + old - new; 3221 x = (x + (x >> 16)) & 0xffff; 3222 3223 /* optimise: eliminate a branch when not udp */ 3224 if (udp && cksum == 0x0000) 3225 return cksum; 3226 if (udp && x == 0x0000) 3227 x = 0xffff; 3228 3229 return (u_int16_t)(x); 3230 } 3231 3232 static int 3233 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 3234 { 3235 int rewrite = 0; 3236 3237 if (*f != v) { 3238 uint16_t old = htons(hi ? (*f << 8) : *f); 3239 uint16_t new = htons(hi ? ( v << 8) : v); 3240 3241 *f = v; 3242 3243 if (! (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | 3244 CSUM_DELAY_DATA_IPV6))) 3245 *pd->pcksum = pf_cksum_fixup(*pd->pcksum, old, new, 3246 pd->proto == IPPROTO_UDP); 3247 3248 rewrite = 1; 3249 } 3250 3251 return (rewrite); 3252 } 3253 3254 int 3255 pf_patch_16(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 3256 { 3257 int rewrite = 0; 3258 u_int8_t *fb = (u_int8_t *)f; 3259 u_int8_t *vb = (u_int8_t *)&v; 3260 3261 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 3262 rewrite += pf_patch_8(pd, fb++, *vb++, !hi); 3263 3264 return (rewrite); 3265 } 3266 3267 int 3268 pf_patch_32(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 3269 { 3270 int rewrite = 0; 3271 u_int8_t *fb = (u_int8_t *)f; 3272 u_int8_t *vb = (u_int8_t *)&v; 3273 3274 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 3275 rewrite += pf_patch_8(pd, fb++, *vb++, !hi); 3276 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 3277 rewrite += pf_patch_8(pd, fb++, *vb++, !hi); 3278 3279 return (rewrite); 3280 } 3281 3282 u_int16_t 3283 pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old, 3284 u_int16_t new, u_int8_t udp) 3285 { 3286 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) 3287 return (cksum); 3288 3289 return (pf_cksum_fixup(cksum, old, new, udp)); 3290 } 3291 3292 static void 3293 pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p, 3294 struct pf_addr *an, u_int16_t pn) 3295 { 3296 struct pf_addr ao; 3297 u_int16_t po; 3298 uint8_t u = pd->virtual_proto == IPPROTO_UDP; 3299 3300 MPASS(pd->pcksum); 3301 if (pd->af == AF_INET) { 3302 MPASS(pd->ip_sum); 3303 } 3304 3305 pf_addrcpy(&ao, a, pd->af); 3306 if (pd->af == pd->naf) 3307 pf_addrcpy(a, an, pd->af); 3308 3309 if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) 3310 *pd->pcksum = ~*pd->pcksum; 3311 3312 if (p == NULL) /* no port -> done. no cksum to worry about. */ 3313 return; 3314 po = *p; 3315 *p = pn; 3316 3317 switch (pd->af) { 3318 #ifdef INET 3319 case AF_INET: 3320 switch (pd->naf) { 3321 case AF_INET: 3322 *pd->ip_sum = pf_cksum_fixup(pf_cksum_fixup(*pd->ip_sum, 3323 ao.addr16[0], an->addr16[0], 0), 3324 ao.addr16[1], an->addr16[1], 0); 3325 *p = pn; 3326 3327 *pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum, 3328 ao.addr16[0], an->addr16[0], u), 3329 ao.addr16[1], an->addr16[1], u); 3330 3331 *pd->pcksum = pf_proto_cksum_fixup(pd->m, *pd->pcksum, po, pn, u); 3332 break; 3333 #ifdef INET6 3334 case AF_INET6: 3335 *pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3336 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3337 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum, 3338 ao.addr16[0], an->addr16[0], u), 3339 ao.addr16[1], an->addr16[1], u), 3340 0, an->addr16[2], u), 3341 0, an->addr16[3], u), 3342 0, an->addr16[4], u), 3343 0, an->addr16[5], u), 3344 0, an->addr16[6], u), 3345 0, an->addr16[7], u), 3346 po, pn, u); 3347 break; 3348 #endif /* INET6 */ 3349 default: 3350 unhandled_af(pd->naf); 3351 } 3352 break; 3353 #endif /* INET */ 3354 #ifdef INET6 3355 case AF_INET6: 3356 switch (pd->naf) { 3357 #ifdef INET 3358 case AF_INET: 3359 *pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3360 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3361 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum, 3362 ao.addr16[0], an->addr16[0], u), 3363 ao.addr16[1], an->addr16[1], u), 3364 ao.addr16[2], 0, u), 3365 ao.addr16[3], 0, u), 3366 ao.addr16[4], 0, u), 3367 ao.addr16[5], 0, u), 3368 ao.addr16[6], 0, u), 3369 ao.addr16[7], 0, u), 3370 po, pn, u); 3371 break; 3372 #endif /* INET */ 3373 case AF_INET6: 3374 *pd->pcksum = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3375 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3376 pf_cksum_fixup(pf_cksum_fixup(*pd->pcksum, 3377 ao.addr16[0], an->addr16[0], u), 3378 ao.addr16[1], an->addr16[1], u), 3379 ao.addr16[2], an->addr16[2], u), 3380 ao.addr16[3], an->addr16[3], u), 3381 ao.addr16[4], an->addr16[4], u), 3382 ao.addr16[5], an->addr16[5], u), 3383 ao.addr16[6], an->addr16[6], u), 3384 ao.addr16[7], an->addr16[7], u); 3385 3386 *pd->pcksum = pf_proto_cksum_fixup(pd->m, *pd->pcksum, po, pn, u); 3387 break; 3388 default: 3389 unhandled_af(pd->naf); 3390 } 3391 break; 3392 #endif /* INET6 */ 3393 default: 3394 unhandled_af(pd->af); 3395 } 3396 3397 if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | 3398 CSUM_DELAY_DATA_IPV6)) { 3399 *pd->pcksum = ~*pd->pcksum; 3400 if (! *pd->pcksum) 3401 *pd->pcksum = 0xffff; 3402 } 3403 } 3404 3405 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 3406 void 3407 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) 3408 { 3409 u_int32_t ao; 3410 3411 memcpy(&ao, a, sizeof(ao)); 3412 memcpy(a, &an, sizeof(u_int32_t)); 3413 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), 3414 ao % 65536, an % 65536, u); 3415 } 3416 3417 void 3418 pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp) 3419 { 3420 u_int32_t ao; 3421 3422 memcpy(&ao, a, sizeof(ao)); 3423 memcpy(a, &an, sizeof(u_int32_t)); 3424 3425 *c = pf_proto_cksum_fixup(m, 3426 pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp), 3427 ao % 65536, an % 65536, udp); 3428 } 3429 3430 #ifdef INET6 3431 static void 3432 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) 3433 { 3434 struct pf_addr ao; 3435 3436 pf_addrcpy(&ao, a, AF_INET6); 3437 pf_addrcpy(a, an, AF_INET6); 3438 3439 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3440 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3441 pf_cksum_fixup(pf_cksum_fixup(*c, 3442 ao.addr16[0], an->addr16[0], u), 3443 ao.addr16[1], an->addr16[1], u), 3444 ao.addr16[2], an->addr16[2], u), 3445 ao.addr16[3], an->addr16[3], u), 3446 ao.addr16[4], an->addr16[4], u), 3447 ao.addr16[5], an->addr16[5], u), 3448 ao.addr16[6], an->addr16[6], u), 3449 ao.addr16[7], an->addr16[7], u); 3450 } 3451 #endif /* INET6 */ 3452 3453 static void 3454 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, 3455 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, 3456 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) 3457 { 3458 struct pf_addr oia, ooa; 3459 3460 pf_addrcpy(&oia, ia, af); 3461 if (oa) 3462 pf_addrcpy(&ooa, oa, af); 3463 3464 /* Change inner protocol port, fix inner protocol checksum. */ 3465 if (ip != NULL) { 3466 u_int16_t oip = *ip; 3467 u_int32_t opc; 3468 3469 if (pc != NULL) 3470 opc = *pc; 3471 *ip = np; 3472 if (pc != NULL) 3473 *pc = pf_cksum_fixup(*pc, oip, *ip, u); 3474 *ic = pf_cksum_fixup(*ic, oip, *ip, 0); 3475 if (pc != NULL) 3476 *ic = pf_cksum_fixup(*ic, opc, *pc, 0); 3477 } 3478 /* Change inner ip address, fix inner ip and icmp checksums. */ 3479 pf_addrcpy(ia, na, af); 3480 switch (af) { 3481 #ifdef INET 3482 case AF_INET: { 3483 u_int32_t oh2c = *h2c; 3484 3485 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, 3486 oia.addr16[0], ia->addr16[0], 0), 3487 oia.addr16[1], ia->addr16[1], 0); 3488 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 3489 oia.addr16[0], ia->addr16[0], 0), 3490 oia.addr16[1], ia->addr16[1], 0); 3491 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); 3492 break; 3493 } 3494 #endif /* INET */ 3495 #ifdef INET6 3496 case AF_INET6: 3497 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3498 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3499 pf_cksum_fixup(pf_cksum_fixup(*ic, 3500 oia.addr16[0], ia->addr16[0], u), 3501 oia.addr16[1], ia->addr16[1], u), 3502 oia.addr16[2], ia->addr16[2], u), 3503 oia.addr16[3], ia->addr16[3], u), 3504 oia.addr16[4], ia->addr16[4], u), 3505 oia.addr16[5], ia->addr16[5], u), 3506 oia.addr16[6], ia->addr16[6], u), 3507 oia.addr16[7], ia->addr16[7], u); 3508 break; 3509 #endif /* INET6 */ 3510 } 3511 /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ 3512 if (oa) { 3513 pf_addrcpy(oa, na, af); 3514 switch (af) { 3515 #ifdef INET 3516 case AF_INET: 3517 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, 3518 ooa.addr16[0], oa->addr16[0], 0), 3519 ooa.addr16[1], oa->addr16[1], 0); 3520 break; 3521 #endif /* INET */ 3522 #ifdef INET6 3523 case AF_INET6: 3524 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3525 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 3526 pf_cksum_fixup(pf_cksum_fixup(*ic, 3527 ooa.addr16[0], oa->addr16[0], u), 3528 ooa.addr16[1], oa->addr16[1], u), 3529 ooa.addr16[2], oa->addr16[2], u), 3530 ooa.addr16[3], oa->addr16[3], u), 3531 ooa.addr16[4], oa->addr16[4], u), 3532 ooa.addr16[5], oa->addr16[5], u), 3533 ooa.addr16[6], oa->addr16[6], u), 3534 ooa.addr16[7], oa->addr16[7], u); 3535 break; 3536 #endif /* INET6 */ 3537 } 3538 } 3539 } 3540 3541 int 3542 pf_translate_af(struct pf_pdesc *pd) 3543 { 3544 #if defined(INET) && defined(INET6) 3545 struct mbuf *mp; 3546 struct ip *ip4; 3547 struct ip6_hdr *ip6; 3548 struct icmp6_hdr *icmp; 3549 struct m_tag *mtag; 3550 struct pf_fragment_tag *ftag; 3551 int hlen; 3552 3553 hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 3554 3555 /* trim the old header */ 3556 m_adj(pd->m, pd->off); 3557 3558 /* prepend a new one */ 3559 M_PREPEND(pd->m, hlen, M_NOWAIT); 3560 if (pd->m == NULL) 3561 return (-1); 3562 3563 switch (pd->naf) { 3564 case AF_INET: 3565 ip4 = mtod(pd->m, struct ip *); 3566 bzero(ip4, hlen); 3567 ip4->ip_v = IPVERSION; 3568 ip4->ip_hl = hlen >> 2; 3569 ip4->ip_tos = pd->tos; 3570 ip4->ip_len = htons(hlen + (pd->tot_len - pd->off)); 3571 ip_fillid(ip4, V_ip_random_id); 3572 ip4->ip_ttl = pd->ttl; 3573 ip4->ip_p = pd->proto; 3574 ip4->ip_src = pd->nsaddr.v4; 3575 ip4->ip_dst = pd->ndaddr.v4; 3576 pd->src = (struct pf_addr *)&ip4->ip_src; 3577 pd->dst = (struct pf_addr *)&ip4->ip_dst; 3578 pd->off = sizeof(struct ip); 3579 break; 3580 case AF_INET6: 3581 ip6 = mtod(pd->m, struct ip6_hdr *); 3582 bzero(ip6, hlen); 3583 ip6->ip6_vfc = IPV6_VERSION; 3584 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 3585 ip6->ip6_plen = htons(pd->tot_len - pd->off); 3586 ip6->ip6_nxt = pd->proto; 3587 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 3588 ip6->ip6_hlim = IPV6_DEFHLIM; 3589 else 3590 ip6->ip6_hlim = pd->ttl; 3591 ip6->ip6_src = pd->nsaddr.v6; 3592 ip6->ip6_dst = pd->ndaddr.v6; 3593 pd->src = (struct pf_addr *)&ip6->ip6_src; 3594 pd->dst = (struct pf_addr *)&ip6->ip6_dst; 3595 pd->off = sizeof(struct ip6_hdr); 3596 3597 /* 3598 * If we're dealing with a reassembled packet we need to adjust 3599 * the header length from the IPv4 header size to IPv6 header 3600 * size. 3601 */ 3602 mtag = m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL); 3603 if (mtag) { 3604 ftag = (struct pf_fragment_tag *)(mtag + 1); 3605 ftag->ft_hdrlen = sizeof(*ip6); 3606 ftag->ft_maxlen -= sizeof(struct ip6_hdr) - 3607 sizeof(struct ip) + sizeof(struct ip6_frag); 3608 } 3609 break; 3610 default: 3611 return (-1); 3612 } 3613 3614 /* recalculate icmp/icmp6 checksums */ 3615 if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) { 3616 int off; 3617 if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) == 3618 NULL) { 3619 pd->m = NULL; 3620 return (-1); 3621 } 3622 icmp = (struct icmp6_hdr *)(mp->m_data + off); 3623 icmp->icmp6_cksum = 0; 3624 icmp->icmp6_cksum = pd->naf == AF_INET ? 3625 in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) : 3626 in6_cksum(pd->m, IPPROTO_ICMPV6, hlen, 3627 ntohs(ip6->ip6_plen)); 3628 } 3629 #endif /* INET && INET6 */ 3630 3631 return (0); 3632 } 3633 3634 int 3635 pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd, 3636 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 3637 sa_family_t af, sa_family_t naf) 3638 { 3639 #if defined(INET) && defined(INET6) 3640 struct mbuf *n = NULL; 3641 struct ip *ip4; 3642 struct ip6_hdr *ip6; 3643 int hlen, olen, mlen; 3644 3645 if (af == naf || (af != AF_INET && af != AF_INET6) || 3646 (naf != AF_INET && naf != AF_INET6)) 3647 return (-1); 3648 3649 /* split the mbuf chain on the inner ip/ip6 header boundary */ 3650 if ((n = m_split(m, off, M_NOWAIT)) == NULL) 3651 return (-1); 3652 3653 /* old header */ 3654 olen = pd2->off - off; 3655 /* new header */ 3656 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 3657 3658 /* trim old header */ 3659 m_adj(n, olen); 3660 3661 /* prepend a new one */ 3662 M_PREPEND(n, hlen, M_NOWAIT); 3663 if (n == NULL) 3664 return (-1); 3665 3666 /* translate inner ip/ip6 header */ 3667 switch (naf) { 3668 case AF_INET: 3669 ip4 = mtod(n, struct ip *); 3670 bzero(ip4, sizeof(*ip4)); 3671 ip4->ip_v = IPVERSION; 3672 ip4->ip_hl = sizeof(*ip4) >> 2; 3673 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - olen); 3674 ip_fillid(ip4, V_ip_random_id); 3675 ip4->ip_off = htons(IP_DF); 3676 ip4->ip_ttl = pd2->ttl; 3677 if (pd2->proto == IPPROTO_ICMPV6) 3678 ip4->ip_p = IPPROTO_ICMP; 3679 else 3680 ip4->ip_p = pd2->proto; 3681 ip4->ip_src = src->v4; 3682 ip4->ip_dst = dst->v4; 3683 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 3684 break; 3685 case AF_INET6: 3686 ip6 = mtod(n, struct ip6_hdr *); 3687 bzero(ip6, sizeof(*ip6)); 3688 ip6->ip6_vfc = IPV6_VERSION; 3689 ip6->ip6_plen = htons(pd2->tot_len - olen); 3690 if (pd2->proto == IPPROTO_ICMP) 3691 ip6->ip6_nxt = IPPROTO_ICMPV6; 3692 else 3693 ip6->ip6_nxt = pd2->proto; 3694 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 3695 ip6->ip6_hlim = IPV6_DEFHLIM; 3696 else 3697 ip6->ip6_hlim = pd2->ttl; 3698 ip6->ip6_src = src->v6; 3699 ip6->ip6_dst = dst->v6; 3700 break; 3701 default: 3702 unhandled_af(naf); 3703 } 3704 3705 /* adjust payload offset and total packet length */ 3706 pd2->off += hlen - olen; 3707 pd->tot_len += hlen - olen; 3708 3709 /* merge modified inner packet with the original header */ 3710 mlen = n->m_pkthdr.len; 3711 m_cat(m, n); 3712 m->m_pkthdr.len += mlen; 3713 #endif /* INET && INET6 */ 3714 3715 return (0); 3716 } 3717 3718 #define PTR_IP(field) (offsetof(struct ip, field)) 3719 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 3720 3721 int 3722 pf_translate_icmp_af(int af, void *arg) 3723 { 3724 #if defined(INET) && defined(INET6) 3725 struct icmp *icmp4; 3726 struct icmp6_hdr *icmp6; 3727 u_int32_t mtu; 3728 int32_t ptr = -1; 3729 u_int8_t type; 3730 u_int8_t code; 3731 3732 switch (af) { 3733 case AF_INET: 3734 icmp6 = arg; 3735 type = icmp6->icmp6_type; 3736 code = icmp6->icmp6_code; 3737 mtu = ntohl(icmp6->icmp6_mtu); 3738 3739 switch (type) { 3740 case ICMP6_ECHO_REQUEST: 3741 type = ICMP_ECHO; 3742 break; 3743 case ICMP6_ECHO_REPLY: 3744 type = ICMP_ECHOREPLY; 3745 break; 3746 case ICMP6_DST_UNREACH: 3747 type = ICMP_UNREACH; 3748 switch (code) { 3749 case ICMP6_DST_UNREACH_NOROUTE: 3750 case ICMP6_DST_UNREACH_BEYONDSCOPE: 3751 case ICMP6_DST_UNREACH_ADDR: 3752 code = ICMP_UNREACH_HOST; 3753 break; 3754 case ICMP6_DST_UNREACH_ADMIN: 3755 code = ICMP_UNREACH_HOST_PROHIB; 3756 break; 3757 case ICMP6_DST_UNREACH_NOPORT: 3758 code = ICMP_UNREACH_PORT; 3759 break; 3760 default: 3761 return (-1); 3762 } 3763 break; 3764 case ICMP6_PACKET_TOO_BIG: 3765 type = ICMP_UNREACH; 3766 code = ICMP_UNREACH_NEEDFRAG; 3767 mtu -= 20; 3768 break; 3769 case ICMP6_TIME_EXCEEDED: 3770 type = ICMP_TIMXCEED; 3771 break; 3772 case ICMP6_PARAM_PROB: 3773 switch (code) { 3774 case ICMP6_PARAMPROB_HEADER: 3775 type = ICMP_PARAMPROB; 3776 code = ICMP_PARAMPROB_ERRATPTR; 3777 ptr = ntohl(icmp6->icmp6_pptr); 3778 3779 if (ptr == PTR_IP6(ip6_vfc)) 3780 ; /* preserve */ 3781 else if (ptr == PTR_IP6(ip6_vfc) + 1) 3782 ptr = PTR_IP(ip_tos); 3783 else if (ptr == PTR_IP6(ip6_plen) || 3784 ptr == PTR_IP6(ip6_plen) + 1) 3785 ptr = PTR_IP(ip_len); 3786 else if (ptr == PTR_IP6(ip6_nxt)) 3787 ptr = PTR_IP(ip_p); 3788 else if (ptr == PTR_IP6(ip6_hlim)) 3789 ptr = PTR_IP(ip_ttl); 3790 else if (ptr >= PTR_IP6(ip6_src) && 3791 ptr < PTR_IP6(ip6_dst)) 3792 ptr = PTR_IP(ip_src); 3793 else if (ptr >= PTR_IP6(ip6_dst) && 3794 ptr < sizeof(struct ip6_hdr)) 3795 ptr = PTR_IP(ip_dst); 3796 else { 3797 return (-1); 3798 } 3799 break; 3800 case ICMP6_PARAMPROB_NEXTHEADER: 3801 type = ICMP_UNREACH; 3802 code = ICMP_UNREACH_PROTOCOL; 3803 break; 3804 default: 3805 return (-1); 3806 } 3807 break; 3808 default: 3809 return (-1); 3810 } 3811 if (icmp6->icmp6_type != type) { 3812 icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum, 3813 icmp6->icmp6_type, type, 0); 3814 icmp6->icmp6_type = type; 3815 } 3816 if (icmp6->icmp6_code != code) { 3817 icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum, 3818 icmp6->icmp6_code, code, 0); 3819 icmp6->icmp6_code = code; 3820 } 3821 if (icmp6->icmp6_mtu != htonl(mtu)) { 3822 icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum, 3823 htons(ntohl(icmp6->icmp6_mtu)), htons(mtu), 0); 3824 /* aligns well with a icmpv4 nextmtu */ 3825 icmp6->icmp6_mtu = htonl(mtu); 3826 } 3827 if (ptr >= 0 && icmp6->icmp6_pptr != htonl(ptr)) { 3828 icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum, 3829 htons(ntohl(icmp6->icmp6_pptr)), htons(ptr), 0); 3830 /* icmpv4 pptr is a one most significant byte */ 3831 icmp6->icmp6_pptr = htonl(ptr << 24); 3832 } 3833 break; 3834 case AF_INET6: 3835 icmp4 = arg; 3836 type = icmp4->icmp_type; 3837 code = icmp4->icmp_code; 3838 mtu = ntohs(icmp4->icmp_nextmtu); 3839 3840 switch (type) { 3841 case ICMP_ECHO: 3842 type = ICMP6_ECHO_REQUEST; 3843 break; 3844 case ICMP_ECHOREPLY: 3845 type = ICMP6_ECHO_REPLY; 3846 break; 3847 case ICMP_UNREACH: 3848 type = ICMP6_DST_UNREACH; 3849 switch (code) { 3850 case ICMP_UNREACH_NET: 3851 case ICMP_UNREACH_HOST: 3852 case ICMP_UNREACH_NET_UNKNOWN: 3853 case ICMP_UNREACH_HOST_UNKNOWN: 3854 case ICMP_UNREACH_ISOLATED: 3855 case ICMP_UNREACH_TOSNET: 3856 case ICMP_UNREACH_TOSHOST: 3857 code = ICMP6_DST_UNREACH_NOROUTE; 3858 break; 3859 case ICMP_UNREACH_PORT: 3860 code = ICMP6_DST_UNREACH_NOPORT; 3861 break; 3862 case ICMP_UNREACH_NET_PROHIB: 3863 case ICMP_UNREACH_HOST_PROHIB: 3864 case ICMP_UNREACH_FILTER_PROHIB: 3865 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 3866 code = ICMP6_DST_UNREACH_ADMIN; 3867 break; 3868 case ICMP_UNREACH_PROTOCOL: 3869 type = ICMP6_PARAM_PROB; 3870 code = ICMP6_PARAMPROB_NEXTHEADER; 3871 ptr = offsetof(struct ip6_hdr, ip6_nxt); 3872 break; 3873 case ICMP_UNREACH_NEEDFRAG: 3874 type = ICMP6_PACKET_TOO_BIG; 3875 code = 0; 3876 mtu += 20; 3877 break; 3878 default: 3879 return (-1); 3880 } 3881 break; 3882 case ICMP_TIMXCEED: 3883 type = ICMP6_TIME_EXCEEDED; 3884 break; 3885 case ICMP_PARAMPROB: 3886 type = ICMP6_PARAM_PROB; 3887 switch (code) { 3888 case ICMP_PARAMPROB_ERRATPTR: 3889 code = ICMP6_PARAMPROB_HEADER; 3890 break; 3891 case ICMP_PARAMPROB_LENGTH: 3892 code = ICMP6_PARAMPROB_HEADER; 3893 break; 3894 default: 3895 return (-1); 3896 } 3897 3898 ptr = icmp4->icmp_pptr; 3899 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 3900 ; /* preserve */ 3901 else if (ptr == PTR_IP(ip_len) || 3902 ptr == PTR_IP(ip_len) + 1) 3903 ptr = PTR_IP6(ip6_plen); 3904 else if (ptr == PTR_IP(ip_ttl)) 3905 ptr = PTR_IP6(ip6_hlim); 3906 else if (ptr == PTR_IP(ip_p)) 3907 ptr = PTR_IP6(ip6_nxt); 3908 else if (ptr >= PTR_IP(ip_src) && ptr < PTR_IP(ip_dst)) 3909 ptr = PTR_IP6(ip6_src); 3910 else if (ptr >= PTR_IP(ip_dst) && 3911 ptr < sizeof(struct ip)) 3912 ptr = PTR_IP6(ip6_dst); 3913 else { 3914 return (-1); 3915 } 3916 break; 3917 default: 3918 return (-1); 3919 } 3920 if (icmp4->icmp_type != type) { 3921 icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum, 3922 icmp4->icmp_type, type, 0); 3923 icmp4->icmp_type = type; 3924 } 3925 if (icmp4->icmp_code != code) { 3926 icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum, 3927 icmp4->icmp_code, code, 0); 3928 icmp4->icmp_code = code; 3929 } 3930 if (icmp4->icmp_nextmtu != htons(mtu)) { 3931 icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum, 3932 icmp4->icmp_nextmtu, htons(mtu), 0); 3933 icmp4->icmp_nextmtu = htons(mtu); 3934 } 3935 if (ptr >= 0 && icmp4->icmp_void != ptr) { 3936 icmp4->icmp_cksum = pf_cksum_fixup(icmp4->icmp_cksum, 3937 htons(icmp4->icmp_pptr), htons(ptr), 0); 3938 icmp4->icmp_void = htonl(ptr); 3939 } 3940 break; 3941 default: 3942 unhandled_af(af); 3943 } 3944 #endif /* INET && INET6 */ 3945 3946 return (0); 3947 } 3948 3949 /* 3950 * Need to modulate the sequence numbers in the TCP SACK option 3951 * (credits to Krzysztof Pfaff for report and patch) 3952 */ 3953 static int 3954 pf_modulate_sack(struct pf_pdesc *pd, struct tcphdr *th, 3955 struct pf_state_peer *dst) 3956 { 3957 struct sackblk sack; 3958 int copyback = 0, i; 3959 int olen, optsoff; 3960 uint8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 3961 3962 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3963 optsoff = pd->off + sizeof(struct tcphdr); 3964 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 3965 if (olen < TCPOLEN_MINSACK || 3966 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 3967 return (0); 3968 3969 eoh = opts + olen; 3970 opt = opts; 3971 while ((opt = pf_find_tcpopt(opt, opts, olen, 3972 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 3973 { 3974 size_t safelen = MIN(opt[1], (eoh - opt)); 3975 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 3976 size_t startoff = (opt + i) - opts; 3977 memcpy(&sack, &opt[i], sizeof(sack)); 3978 pf_patch_32(pd, &sack.start, 3979 htonl(ntohl(sack.start) - dst->seqdiff), 3980 PF_ALGNMNT(startoff)); 3981 pf_patch_32(pd, &sack.end, 3982 htonl(ntohl(sack.end) - dst->seqdiff), 3983 PF_ALGNMNT(startoff + sizeof(sack.start))); 3984 memcpy(&opt[i], &sack, sizeof(sack)); 3985 } 3986 copyback = 1; 3987 opt += opt[1]; 3988 } 3989 3990 if (copyback) 3991 m_copyback(pd->m, optsoff, olen, (caddr_t)opts); 3992 3993 return (copyback); 3994 } 3995 3996 struct mbuf * 3997 pf_build_tcp(const struct pf_krule *r, sa_family_t af, 3998 const struct pf_addr *saddr, const struct pf_addr *daddr, 3999 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 4000 u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, 4001 int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, u_int sack, 4002 int rtableid) 4003 { 4004 struct mbuf *m; 4005 int len, tlen; 4006 #ifdef INET 4007 struct ip *h = NULL; 4008 #endif /* INET */ 4009 #ifdef INET6 4010 struct ip6_hdr *h6 = NULL; 4011 #endif /* INET6 */ 4012 struct tcphdr *th; 4013 char *opt; 4014 struct pf_mtag *pf_mtag; 4015 4016 len = 0; 4017 th = NULL; 4018 4019 /* maximum segment size tcp option */ 4020 tlen = sizeof(struct tcphdr); 4021 if (mss) 4022 tlen += 4; 4023 if (sack) 4024 tlen += 2; 4025 4026 switch (af) { 4027 #ifdef INET 4028 case AF_INET: 4029 len = sizeof(struct ip) + tlen; 4030 break; 4031 #endif /* INET */ 4032 #ifdef INET6 4033 case AF_INET6: 4034 len = sizeof(struct ip6_hdr) + tlen; 4035 break; 4036 #endif /* INET6 */ 4037 default: 4038 unhandled_af(af); 4039 } 4040 4041 m = m_gethdr(M_NOWAIT, MT_DATA); 4042 if (m == NULL) 4043 return (NULL); 4044 4045 #ifdef MAC 4046 mac_netinet_firewall_send(m); 4047 #endif 4048 if ((pf_mtag = pf_get_mtag(m)) == NULL) { 4049 m_freem(m); 4050 return (NULL); 4051 } 4052 m->m_flags |= mbuf_flags; 4053 pf_mtag->tag = mtag_tag; 4054 pf_mtag->flags = mtag_flags; 4055 4056 if (rtableid >= 0) 4057 M_SETFIB(m, rtableid); 4058 4059 #ifdef ALTQ 4060 if (r != NULL && r->qid) { 4061 pf_mtag->qid = r->qid; 4062 4063 /* add hints for ecn */ 4064 pf_mtag->hdr = mtod(m, struct ip *); 4065 } 4066 #endif /* ALTQ */ 4067 m->m_data += max_linkhdr; 4068 m->m_pkthdr.len = m->m_len = len; 4069 /* The rest of the stack assumes a rcvif, so provide one. 4070 * This is a locally generated packet, so .. close enough. */ 4071 m->m_pkthdr.rcvif = V_loif; 4072 bzero(m->m_data, len); 4073 switch (af) { 4074 #ifdef INET 4075 case AF_INET: 4076 m->m_pkthdr.csum_flags |= CSUM_TCP; 4077 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 4078 4079 h = mtod(m, struct ip *); 4080 4081 h->ip_p = IPPROTO_TCP; 4082 h->ip_len = htons(tlen); 4083 h->ip_v = 4; 4084 h->ip_hl = sizeof(*h) >> 2; 4085 h->ip_tos = IPTOS_LOWDELAY; 4086 h->ip_len = htons(len); 4087 h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0); 4088 h->ip_ttl = ttl ? ttl : V_ip_defttl; 4089 h->ip_sum = 0; 4090 h->ip_src.s_addr = saddr->v4.s_addr; 4091 h->ip_dst.s_addr = daddr->v4.s_addr; 4092 4093 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 4094 th->th_sum = in_pseudo(h->ip_src.s_addr, h->ip_dst.s_addr, 4095 htons(len - sizeof(struct ip) + IPPROTO_TCP)); 4096 break; 4097 #endif /* INET */ 4098 #ifdef INET6 4099 case AF_INET6: 4100 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 4101 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 4102 4103 h6 = mtod(m, struct ip6_hdr *); 4104 4105 /* IP header fields included in the TCP checksum */ 4106 h6->ip6_nxt = IPPROTO_TCP; 4107 h6->ip6_plen = htons(tlen); 4108 h6->ip6_vfc |= IPV6_VERSION; 4109 h6->ip6_hlim = V_ip6_defhlim; 4110 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 4111 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 4112 4113 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 4114 th->th_sum = in6_cksum_pseudo(h6, len - sizeof(struct ip6_hdr), 4115 IPPROTO_TCP, 0); 4116 break; 4117 #endif /* INET6 */ 4118 } 4119 4120 /* TCP header */ 4121 th->th_sport = sport; 4122 th->th_dport = dport; 4123 th->th_seq = htonl(seq); 4124 th->th_ack = htonl(ack); 4125 th->th_off = tlen >> 2; 4126 tcp_set_flags(th, tcp_flags); 4127 th->th_win = htons(win); 4128 4129 opt = (char *)(th + 1); 4130 if (mss) { 4131 opt = (char *)(th + 1); 4132 opt[0] = TCPOPT_MAXSEG; 4133 opt[1] = 4; 4134 mss = htons(mss); 4135 memcpy((opt + 2), &mss, 2); 4136 opt += 4; 4137 } 4138 if (sack) { 4139 opt[0] = TCPOPT_SACK_PERMITTED; 4140 opt[1] = 2; 4141 opt += 2; 4142 } 4143 4144 return (m); 4145 } 4146 4147 static void 4148 pf_send_sctp_abort(sa_family_t af, struct pf_pdesc *pd, 4149 uint8_t ttl, int rtableid) 4150 { 4151 struct mbuf *m; 4152 #ifdef INET 4153 struct ip *h = NULL; 4154 #endif /* INET */ 4155 #ifdef INET6 4156 struct ip6_hdr *h6 = NULL; 4157 #endif /* INET6 */ 4158 struct sctphdr *hdr; 4159 struct sctp_chunkhdr *chunk; 4160 struct pf_send_entry *pfse; 4161 int off = 0; 4162 4163 MPASS(af == pd->af); 4164 4165 m = m_gethdr(M_NOWAIT, MT_DATA); 4166 if (m == NULL) 4167 return; 4168 4169 m->m_data += max_linkhdr; 4170 m->m_flags |= M_SKIP_FIREWALL; 4171 /* The rest of the stack assumes a rcvif, so provide one. 4172 * This is a locally generated packet, so .. close enough. */ 4173 m->m_pkthdr.rcvif = V_loif; 4174 4175 /* IPv4|6 header */ 4176 switch (af) { 4177 #ifdef INET 4178 case AF_INET: 4179 bzero(m->m_data, sizeof(struct ip) + sizeof(*hdr) + sizeof(*chunk)); 4180 4181 h = mtod(m, struct ip *); 4182 4183 /* IP header fields included in the TCP checksum */ 4184 4185 h->ip_p = IPPROTO_SCTP; 4186 h->ip_len = htons(sizeof(*h) + sizeof(*hdr) + sizeof(*chunk)); 4187 h->ip_ttl = ttl ? ttl : V_ip_defttl; 4188 h->ip_src = pd->dst->v4; 4189 h->ip_dst = pd->src->v4; 4190 4191 off += sizeof(struct ip); 4192 break; 4193 #endif /* INET */ 4194 #ifdef INET6 4195 case AF_INET6: 4196 bzero(m->m_data, sizeof(struct ip6_hdr) + sizeof(*hdr) + sizeof(*chunk)); 4197 4198 h6 = mtod(m, struct ip6_hdr *); 4199 4200 /* IP header fields included in the TCP checksum */ 4201 h6->ip6_vfc |= IPV6_VERSION; 4202 h6->ip6_nxt = IPPROTO_SCTP; 4203 h6->ip6_plen = htons(sizeof(*h6) + sizeof(*hdr) + sizeof(*chunk)); 4204 h6->ip6_hlim = ttl ? ttl : V_ip6_defhlim; 4205 memcpy(&h6->ip6_src, &pd->dst->v6, sizeof(struct in6_addr)); 4206 memcpy(&h6->ip6_dst, &pd->src->v6, sizeof(struct in6_addr)); 4207 4208 off += sizeof(struct ip6_hdr); 4209 break; 4210 #endif /* INET6 */ 4211 default: 4212 unhandled_af(af); 4213 } 4214 4215 /* SCTP header */ 4216 hdr = mtodo(m, off); 4217 4218 hdr->src_port = pd->hdr.sctp.dest_port; 4219 hdr->dest_port = pd->hdr.sctp.src_port; 4220 hdr->v_tag = pd->sctp_initiate_tag; 4221 hdr->checksum = 0; 4222 4223 /* Abort chunk. */ 4224 off += sizeof(struct sctphdr); 4225 chunk = mtodo(m, off); 4226 4227 chunk->chunk_type = SCTP_ABORT_ASSOCIATION; 4228 chunk->chunk_length = htons(sizeof(*chunk)); 4229 4230 /* SCTP checksum */ 4231 off += sizeof(*chunk); 4232 m->m_pkthdr.len = m->m_len = off; 4233 4234 pf_sctp_checksum(m, off - sizeof(*hdr) - sizeof(*chunk)); 4235 4236 if (rtableid >= 0) 4237 M_SETFIB(m, rtableid); 4238 4239 /* Allocate outgoing queue entry, mbuf and mbuf tag. */ 4240 pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); 4241 if (pfse == NULL) { 4242 m_freem(m); 4243 return; 4244 } 4245 4246 switch (af) { 4247 #ifdef INET 4248 case AF_INET: 4249 pfse->pfse_type = PFSE_IP; 4250 break; 4251 #endif /* INET */ 4252 #ifdef INET6 4253 case AF_INET6: 4254 pfse->pfse_type = PFSE_IP6; 4255 break; 4256 #endif /* INET6 */ 4257 } 4258 4259 pfse->pfse_m = m; 4260 pf_send(pfse); 4261 } 4262 4263 void 4264 pf_send_tcp(const struct pf_krule *r, sa_family_t af, 4265 const struct pf_addr *saddr, const struct pf_addr *daddr, 4266 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 4267 u_int8_t tcp_flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, 4268 int mbuf_flags, u_int16_t mtag_tag, u_int16_t mtag_flags, int rtableid) 4269 { 4270 struct pf_send_entry *pfse; 4271 struct mbuf *m; 4272 4273 m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, tcp_flags, 4274 win, mss, ttl, mbuf_flags, mtag_tag, mtag_flags, 0, rtableid); 4275 if (m == NULL) 4276 return; 4277 4278 /* Allocate outgoing queue entry, mbuf and mbuf tag. */ 4279 pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); 4280 if (pfse == NULL) { 4281 m_freem(m); 4282 return; 4283 } 4284 4285 switch (af) { 4286 #ifdef INET 4287 case AF_INET: 4288 pfse->pfse_type = PFSE_IP; 4289 break; 4290 #endif /* INET */ 4291 #ifdef INET6 4292 case AF_INET6: 4293 pfse->pfse_type = PFSE_IP6; 4294 break; 4295 #endif /* INET6 */ 4296 default: 4297 unhandled_af(af); 4298 } 4299 4300 pfse->pfse_m = m; 4301 pf_send(pfse); 4302 } 4303 4304 static void 4305 pf_undo_nat(struct pf_krule *nr, struct pf_pdesc *pd, uint16_t bip_sum) 4306 { 4307 /* undo NAT changes, if they have taken place */ 4308 if (nr != NULL) { 4309 pf_addrcpy(pd->src, &pd->osrc, pd->af); 4310 pf_addrcpy(pd->dst, &pd->odst, pd->af); 4311 if (pd->sport) 4312 *pd->sport = pd->osport; 4313 if (pd->dport) 4314 *pd->dport = pd->odport; 4315 if (pd->ip_sum) 4316 *pd->ip_sum = bip_sum; 4317 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any); 4318 } 4319 } 4320 4321 static void 4322 pf_return(struct pf_krule *r, struct pf_krule *nr, struct pf_pdesc *pd, 4323 struct tcphdr *th, u_int16_t bproto_sum, u_int16_t bip_sum, 4324 u_short *reason, int rtableid) 4325 { 4326 pf_undo_nat(nr, pd, bip_sum); 4327 4328 if (pd->proto == IPPROTO_TCP && 4329 ((r->rule_flag & PFRULE_RETURNRST) || 4330 (r->rule_flag & PFRULE_RETURN)) && 4331 !(tcp_get_flags(th) & TH_RST)) { 4332 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 4333 4334 if (pf_check_proto_cksum(pd->m, pd->off, pd->tot_len - pd->off, 4335 IPPROTO_TCP, pd->af)) 4336 REASON_SET(reason, PFRES_PROTCKSUM); 4337 else { 4338 if (tcp_get_flags(th) & TH_SYN) 4339 ack++; 4340 if (tcp_get_flags(th) & TH_FIN) 4341 ack++; 4342 pf_send_tcp(r, pd->af, pd->dst, 4343 pd->src, th->th_dport, th->th_sport, 4344 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 4345 r->return_ttl, M_SKIP_FIREWALL, 0, 0, rtableid); 4346 } 4347 } else if (pd->proto == IPPROTO_SCTP && 4348 (r->rule_flag & PFRULE_RETURN)) { 4349 pf_send_sctp_abort(pd->af, pd, r->return_ttl, rtableid); 4350 } else if (pd->proto != IPPROTO_ICMP && pd->af == AF_INET && 4351 r->return_icmp) 4352 pf_send_icmp(pd->m, r->return_icmp >> 8, 4353 r->return_icmp & 255, 0, pd->af, r, rtableid); 4354 else if (pd->proto != IPPROTO_ICMPV6 && pd->af == AF_INET6 && 4355 r->return_icmp6) 4356 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 4357 r->return_icmp6 & 255, 0, pd->af, r, rtableid); 4358 } 4359 4360 static int 4361 pf_match_ieee8021q_pcp(u_int8_t prio, struct mbuf *m) 4362 { 4363 struct m_tag *mtag; 4364 u_int8_t mpcp; 4365 4366 mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); 4367 if (mtag == NULL) 4368 return (0); 4369 4370 if (prio == PF_PRIO_ZERO) 4371 prio = 0; 4372 4373 mpcp = *(uint8_t *)(mtag + 1); 4374 4375 return (mpcp == prio); 4376 } 4377 4378 static int 4379 pf_icmp_to_bandlim(uint8_t type) 4380 { 4381 switch (type) { 4382 case ICMP_ECHO: 4383 case ICMP_ECHOREPLY: 4384 return (BANDLIM_ICMP_ECHO); 4385 case ICMP_TSTAMP: 4386 case ICMP_TSTAMPREPLY: 4387 return (BANDLIM_ICMP_TSTAMP); 4388 case ICMP_UNREACH: 4389 default: 4390 return (BANDLIM_ICMP_UNREACH); 4391 } 4392 } 4393 4394 static void 4395 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_kstate *s, 4396 struct pf_state_peer *src, struct pf_state_peer *dst) 4397 { 4398 /* 4399 * We are sending challenge ACK as a response to SYN packet, which 4400 * matches existing state (modulo TCP window check). Therefore packet 4401 * must be sent on behalf of destination. 4402 * 4403 * We expect sender to remain either silent, or send RST packet 4404 * so both, firewall and remote peer, can purge dead state from 4405 * memory. 4406 */ 4407 pf_send_tcp(s->rule, pd->af, pd->dst, pd->src, 4408 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 4409 src->seqlo, TH_ACK, 0, 0, s->rule->return_ttl, 0, 0, 0, 4410 s->rule->rtableid); 4411 } 4412 4413 static void 4414 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int mtu, 4415 sa_family_t af, struct pf_krule *r, int rtableid) 4416 { 4417 struct pf_send_entry *pfse; 4418 struct mbuf *m0; 4419 struct pf_mtag *pf_mtag; 4420 4421 /* ICMP packet rate limitation. */ 4422 switch (af) { 4423 #ifdef INET6 4424 case AF_INET6: 4425 if (icmp6_ratelimit(NULL, type, code)) 4426 return; 4427 break; 4428 #endif /* INET6 */ 4429 #ifdef INET 4430 case AF_INET: 4431 if (badport_bandlim(pf_icmp_to_bandlim(type)) != 0) 4432 return; 4433 break; 4434 #endif /* INET */ 4435 } 4436 4437 /* Allocate outgoing queue entry, mbuf and mbuf tag. */ 4438 pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT); 4439 if (pfse == NULL) 4440 return; 4441 4442 if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) { 4443 free(pfse, M_PFTEMP); 4444 return; 4445 } 4446 4447 if ((pf_mtag = pf_get_mtag(m0)) == NULL) { 4448 free(pfse, M_PFTEMP); 4449 return; 4450 } 4451 /* XXX: revisit */ 4452 m0->m_flags |= M_SKIP_FIREWALL; 4453 4454 if (rtableid >= 0) 4455 M_SETFIB(m0, rtableid); 4456 4457 #ifdef ALTQ 4458 if (r->qid) { 4459 pf_mtag->qid = r->qid; 4460 /* add hints for ecn */ 4461 pf_mtag->hdr = mtod(m0, struct ip *); 4462 } 4463 #endif /* ALTQ */ 4464 4465 switch (af) { 4466 #ifdef INET 4467 case AF_INET: 4468 pfse->pfse_type = PFSE_ICMP; 4469 break; 4470 #endif /* INET */ 4471 #ifdef INET6 4472 case AF_INET6: 4473 pfse->pfse_type = PFSE_ICMP6; 4474 break; 4475 #endif /* INET6 */ 4476 } 4477 pfse->pfse_m = m0; 4478 pfse->icmpopts.type = type; 4479 pfse->icmpopts.code = code; 4480 pfse->icmpopts.mtu = mtu; 4481 pf_send(pfse); 4482 } 4483 4484 /* 4485 * Return ((n = 0) == (a = b [with mask m])) 4486 * Note: n != 0 => returns (a != b [with mask m]) 4487 */ 4488 int 4489 pf_match_addr(u_int8_t n, const struct pf_addr *a, const struct pf_addr *m, 4490 const struct pf_addr *b, sa_family_t af) 4491 { 4492 switch (af) { 4493 #ifdef INET 4494 case AF_INET: 4495 if (IN_ARE_MASKED_ADDR_EQUAL(a->v4, b->v4, m->v4)) 4496 return (n == 0); 4497 break; 4498 #endif /* INET */ 4499 #ifdef INET6 4500 case AF_INET6: 4501 if (IN6_ARE_MASKED_ADDR_EQUAL(&a->v6, &b->v6, &m->v6)) 4502 return (n == 0); 4503 break; 4504 #endif /* INET6 */ 4505 } 4506 4507 return (n != 0); 4508 } 4509 4510 /* 4511 * Return 1 if b <= a <= e, otherwise return 0. 4512 */ 4513 int 4514 pf_match_addr_range(const struct pf_addr *b, const struct pf_addr *e, 4515 const struct pf_addr *a, sa_family_t af) 4516 { 4517 switch (af) { 4518 #ifdef INET 4519 case AF_INET: 4520 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 4521 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 4522 return (0); 4523 break; 4524 #endif /* INET */ 4525 #ifdef INET6 4526 case AF_INET6: { 4527 int i; 4528 4529 /* check a >= b */ 4530 for (i = 0; i < 4; ++i) 4531 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 4532 break; 4533 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 4534 return (0); 4535 /* check a <= e */ 4536 for (i = 0; i < 4; ++i) 4537 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 4538 break; 4539 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 4540 return (0); 4541 break; 4542 } 4543 #endif /* INET6 */ 4544 } 4545 return (1); 4546 } 4547 4548 static int 4549 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 4550 { 4551 switch (op) { 4552 case PF_OP_IRG: 4553 return ((p > a1) && (p < a2)); 4554 case PF_OP_XRG: 4555 return ((p < a1) || (p > a2)); 4556 case PF_OP_RRG: 4557 return ((p >= a1) && (p <= a2)); 4558 case PF_OP_EQ: 4559 return (p == a1); 4560 case PF_OP_NE: 4561 return (p != a1); 4562 case PF_OP_LT: 4563 return (p < a1); 4564 case PF_OP_LE: 4565 return (p <= a1); 4566 case PF_OP_GT: 4567 return (p > a1); 4568 case PF_OP_GE: 4569 return (p >= a1); 4570 } 4571 return (0); /* never reached */ 4572 } 4573 4574 int 4575 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 4576 { 4577 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 4578 } 4579 4580 static int 4581 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 4582 { 4583 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 4584 return (0); 4585 return (pf_match(op, a1, a2, u)); 4586 } 4587 4588 static int 4589 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 4590 { 4591 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 4592 return (0); 4593 return (pf_match(op, a1, a2, g)); 4594 } 4595 4596 int 4597 pf_match_tag(struct mbuf *m, struct pf_krule *r, int *tag, int mtag) 4598 { 4599 if (*tag == -1) 4600 *tag = mtag; 4601 4602 return ((!r->match_tag_not && r->match_tag == *tag) || 4603 (r->match_tag_not && r->match_tag != *tag)); 4604 } 4605 4606 static int 4607 pf_match_rcvif(struct mbuf *m, struct pf_krule *r) 4608 { 4609 struct ifnet *ifp = m->m_pkthdr.rcvif; 4610 struct pfi_kkif *kif; 4611 4612 if (ifp == NULL) 4613 return (0); 4614 4615 kif = (struct pfi_kkif *)ifp->if_pf_kif; 4616 4617 if (kif == NULL) { 4618 DPFPRINTF(PF_DEBUG_URGENT, 4619 "%s: kif == NULL, @%d via %s", __func__, r->nr, 4620 r->rcv_ifname); 4621 return (0); 4622 } 4623 4624 return (pfi_kkif_match(r->rcv_kif, kif)); 4625 } 4626 4627 int 4628 pf_tag_packet(struct pf_pdesc *pd, int tag) 4629 { 4630 4631 KASSERT(tag > 0, ("%s: tag %d", __func__, tag)); 4632 4633 if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) 4634 return (ENOMEM); 4635 4636 pd->pf_mtag->tag = tag; 4637 4638 return (0); 4639 } 4640 4641 /* 4642 * XXX: We rely on malloc(9) returning pointer aligned addresses. 4643 */ 4644 #define PF_ANCHORSTACK_MATCH 0x00000001 4645 #define PF_ANCHORSTACK_MASK (PF_ANCHORSTACK_MATCH) 4646 4647 #define PF_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH) 4648 #define PF_ANCHOR_RULE(f) (struct pf_krule *) \ 4649 ((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK) 4650 #define PF_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \ 4651 ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \ 4652 } while (0) 4653 4654 enum pf_test_status 4655 pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r) 4656 { 4657 enum pf_test_status rv; 4658 4659 PF_RULES_RASSERT(); 4660 4661 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 4662 printf("%s: anchor stack overflow on %s\n", 4663 __func__, r->anchor->name); 4664 return (PF_TEST_FAIL); 4665 } 4666 4667 ctx->depth++; 4668 4669 if (r->anchor_wildcard) { 4670 struct pf_kanchor *child; 4671 rv = PF_TEST_OK; 4672 RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) { 4673 rv = pf_match_rule(ctx, &child->ruleset); 4674 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 4675 /* 4676 * we either hit a rule with quick action 4677 * (more likely), or hit some runtime 4678 * error (e.g. pool_get() failure). 4679 */ 4680 break; 4681 } 4682 } 4683 } else { 4684 rv = pf_match_rule(ctx, &r->anchor->ruleset); 4685 /* 4686 * Unless errors occured, stop iff any rule matched 4687 * within quick anchors. 4688 */ 4689 if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && 4690 *ctx->am == r) 4691 rv = PF_TEST_QUICK; 4692 } 4693 4694 ctx->depth--; 4695 4696 return (rv); 4697 } 4698 4699 struct pf_keth_anchor_stackframe { 4700 struct pf_keth_ruleset *rs; 4701 struct pf_keth_rule *r; /* XXX: + match bit */ 4702 struct pf_keth_anchor *child; 4703 }; 4704 4705 #define PF_ETH_ANCHOR_MATCH(f) ((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH) 4706 #define PF_ETH_ANCHOR_RULE(f) (struct pf_keth_rule *) \ 4707 ((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK) 4708 #define PF_ETH_ANCHOR_SET_MATCH(f) do { (f)->r = (void *) \ 4709 ((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \ 4710 } while (0) 4711 4712 void 4713 pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth, 4714 struct pf_keth_ruleset **rs, struct pf_keth_rule **r, 4715 struct pf_keth_rule **a, int *match) 4716 { 4717 struct pf_keth_anchor_stackframe *f; 4718 4719 NET_EPOCH_ASSERT(); 4720 4721 if (match) 4722 *match = 0; 4723 if (*depth >= PF_ANCHOR_STACK_MAX) { 4724 printf("%s: anchor stack overflow on %s\n", 4725 __func__, (*r)->anchor->name); 4726 *r = TAILQ_NEXT(*r, entries); 4727 return; 4728 } else if (*depth == 0 && a != NULL) 4729 *a = *r; 4730 f = stack + (*depth)++; 4731 f->rs = *rs; 4732 f->r = *r; 4733 if ((*r)->anchor_wildcard) { 4734 struct pf_keth_anchor_node *parent = &(*r)->anchor->children; 4735 4736 if ((f->child = RB_MIN(pf_keth_anchor_node, parent)) == NULL) { 4737 *r = NULL; 4738 return; 4739 } 4740 *rs = &f->child->ruleset; 4741 } else { 4742 f->child = NULL; 4743 *rs = &(*r)->anchor->ruleset; 4744 } 4745 *r = TAILQ_FIRST((*rs)->active.rules); 4746 } 4747 4748 int 4749 pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth, 4750 struct pf_keth_ruleset **rs, struct pf_keth_rule **r, 4751 struct pf_keth_rule **a, int *match) 4752 { 4753 struct pf_keth_anchor_stackframe *f; 4754 struct pf_keth_rule *fr; 4755 int quick = 0; 4756 4757 NET_EPOCH_ASSERT(); 4758 4759 do { 4760 if (*depth <= 0) 4761 break; 4762 f = stack + *depth - 1; 4763 fr = PF_ETH_ANCHOR_RULE(f); 4764 if (f->child != NULL) { 4765 /* 4766 * This block traverses through 4767 * a wildcard anchor. 4768 */ 4769 if (match != NULL && *match) { 4770 /* 4771 * If any of "*" matched, then 4772 * "foo/ *" matched, mark frame 4773 * appropriately. 4774 */ 4775 PF_ETH_ANCHOR_SET_MATCH(f); 4776 *match = 0; 4777 } 4778 f->child = RB_NEXT(pf_keth_anchor_node, 4779 &fr->anchor->children, f->child); 4780 if (f->child != NULL) { 4781 *rs = &f->child->ruleset; 4782 *r = TAILQ_FIRST((*rs)->active.rules); 4783 if (*r == NULL) 4784 continue; 4785 else 4786 break; 4787 } 4788 } 4789 (*depth)--; 4790 if (*depth == 0 && a != NULL) 4791 *a = NULL; 4792 *rs = f->rs; 4793 if (PF_ETH_ANCHOR_MATCH(f) || (match != NULL && *match)) 4794 quick = fr->quick; 4795 *r = TAILQ_NEXT(fr, entries); 4796 } while (*r == NULL); 4797 4798 return (quick); 4799 } 4800 4801 void 4802 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 4803 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 4804 { 4805 switch (af) { 4806 #ifdef INET 4807 case AF_INET: 4808 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 4809 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 4810 break; 4811 #endif /* INET */ 4812 #ifdef INET6 4813 case AF_INET6: 4814 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 4815 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 4816 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 4817 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 4818 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 4819 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 4820 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 4821 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 4822 break; 4823 #endif /* INET6 */ 4824 } 4825 } 4826 4827 void 4828 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 4829 { 4830 switch (af) { 4831 #ifdef INET 4832 case AF_INET: 4833 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 4834 break; 4835 #endif /* INET */ 4836 #ifdef INET6 4837 case AF_INET6: 4838 if (addr->addr32[3] == 0xffffffff) { 4839 addr->addr32[3] = 0; 4840 if (addr->addr32[2] == 0xffffffff) { 4841 addr->addr32[2] = 0; 4842 if (addr->addr32[1] == 0xffffffff) { 4843 addr->addr32[1] = 0; 4844 addr->addr32[0] = 4845 htonl(ntohl(addr->addr32[0]) + 1); 4846 } else 4847 addr->addr32[1] = 4848 htonl(ntohl(addr->addr32[1]) + 1); 4849 } else 4850 addr->addr32[2] = 4851 htonl(ntohl(addr->addr32[2]) + 1); 4852 } else 4853 addr->addr32[3] = 4854 htonl(ntohl(addr->addr32[3]) + 1); 4855 break; 4856 #endif /* INET6 */ 4857 } 4858 } 4859 4860 void 4861 pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a) 4862 { 4863 /* 4864 * Modern rules use the same flags in rules as they do in states. 4865 */ 4866 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 4867 PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 4868 4869 /* 4870 * Old-style scrub rules have different flags which need to be translated. 4871 */ 4872 if (r->rule_flag & PFRULE_RANDOMID) 4873 a->flags |= PFSTATE_RANDOMID; 4874 if (r->scrub_flags & PFSTATE_SETTOS || r->rule_flag & PFRULE_SET_TOS ) { 4875 a->flags |= PFSTATE_SETTOS; 4876 a->set_tos = r->set_tos; 4877 } 4878 4879 if (r->qid) 4880 a->qid = r->qid; 4881 if (r->pqid) 4882 a->pqid = r->pqid; 4883 if (r->rtableid >= 0) 4884 a->rtableid = r->rtableid; 4885 a->log |= r->log; 4886 if (r->min_ttl) 4887 a->min_ttl = r->min_ttl; 4888 if (r->max_mss) 4889 a->max_mss = r->max_mss; 4890 if (r->dnpipe) 4891 a->dnpipe = r->dnpipe; 4892 if (r->dnrpipe) 4893 a->dnrpipe = r->dnrpipe; 4894 if (r->dnpipe || r->dnrpipe) { 4895 if (r->free_flags & PFRULE_DN_IS_PIPE) 4896 a->flags |= PFSTATE_DN_IS_PIPE; 4897 else 4898 a->flags &= ~PFSTATE_DN_IS_PIPE; 4899 } 4900 if (r->scrub_flags & PFSTATE_SETPRIO) { 4901 a->set_prio[0] = r->set_prio[0]; 4902 a->set_prio[1] = r->set_prio[1]; 4903 } 4904 if (r->allow_opts) 4905 a->allow_opts = r->allow_opts; 4906 if (r->max_pkt_size) 4907 a->max_pkt_size = r->max_pkt_size; 4908 } 4909 4910 int 4911 pf_socket_lookup(struct pf_pdesc *pd) 4912 { 4913 struct pf_addr *saddr, *daddr; 4914 u_int16_t sport, dport; 4915 struct inpcbinfo *pi; 4916 struct inpcb *inp; 4917 4918 pd->lookup.uid = -1; 4919 pd->lookup.gid = -1; 4920 4921 switch (pd->proto) { 4922 case IPPROTO_TCP: 4923 sport = pd->hdr.tcp.th_sport; 4924 dport = pd->hdr.tcp.th_dport; 4925 pi = &V_tcbinfo; 4926 break; 4927 case IPPROTO_UDP: 4928 sport = pd->hdr.udp.uh_sport; 4929 dport = pd->hdr.udp.uh_dport; 4930 pi = &V_udbinfo; 4931 break; 4932 default: 4933 return (-1); 4934 } 4935 if (pd->dir == PF_IN) { 4936 saddr = pd->src; 4937 daddr = pd->dst; 4938 } else { 4939 u_int16_t p; 4940 4941 p = sport; 4942 sport = dport; 4943 dport = p; 4944 saddr = pd->dst; 4945 daddr = pd->src; 4946 } 4947 switch (pd->af) { 4948 #ifdef INET 4949 case AF_INET: 4950 inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4, 4951 dport, INPLOOKUP_RLOCKPCB, NULL, pd->m); 4952 if (inp == NULL) { 4953 inp = in_pcblookup_mbuf(pi, saddr->v4, sport, 4954 daddr->v4, dport, INPLOOKUP_WILDCARD | 4955 INPLOOKUP_RLOCKPCB, NULL, pd->m); 4956 if (inp == NULL) 4957 return (-1); 4958 } 4959 break; 4960 #endif /* INET */ 4961 #ifdef INET6 4962 case AF_INET6: 4963 inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6, 4964 dport, INPLOOKUP_RLOCKPCB, NULL, pd->m); 4965 if (inp == NULL) { 4966 inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, 4967 &daddr->v6, dport, INPLOOKUP_WILDCARD | 4968 INPLOOKUP_RLOCKPCB, NULL, pd->m); 4969 if (inp == NULL) 4970 return (-1); 4971 } 4972 break; 4973 #endif /* INET6 */ 4974 default: 4975 unhandled_af(pd->af); 4976 } 4977 INP_RLOCK_ASSERT(inp); 4978 pd->lookup.uid = inp->inp_cred->cr_uid; 4979 pd->lookup.gid = inp->inp_cred->cr_gid; 4980 INP_RUNLOCK(inp); 4981 4982 return (1); 4983 } 4984 4985 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 4986 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 4987 * 4988 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 4989 */ 4990 uint8_t* 4991 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 4992 u_int8_t min_typelen) 4993 { 4994 uint8_t *eoh = opts + hlen; 4995 4996 if (min_typelen < 2) 4997 return (NULL); 4998 4999 while ((eoh - opt) >= min_typelen) { 5000 switch (*opt) { 5001 case TCPOPT_EOL: 5002 /* FALLTHROUGH - Workaround the failure of some 5003 systems to NOP-pad their bzero'd option buffers, 5004 producing spurious EOLs */ 5005 case TCPOPT_NOP: 5006 opt++; 5007 continue; 5008 default: 5009 if (opt[0] == type && 5010 opt[1] >= min_typelen) 5011 return (opt); 5012 } 5013 5014 opt += MAX(opt[1], 2); /* evade infinite loops */ 5015 } 5016 5017 return (NULL); 5018 } 5019 5020 u_int8_t 5021 pf_get_wscale(struct pf_pdesc *pd) 5022 { 5023 int olen; 5024 uint8_t opts[MAX_TCPOPTLEN], *opt; 5025 uint8_t wscale = 0; 5026 5027 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 5028 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 5029 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 5030 return (0); 5031 5032 opt = opts; 5033 while ((opt = pf_find_tcpopt(opt, opts, olen, 5034 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 5035 wscale = opt[2]; 5036 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 5037 wscale |= PF_WSCALE_FLAG; 5038 5039 opt += opt[1]; 5040 } 5041 5042 return (wscale); 5043 } 5044 5045 u_int16_t 5046 pf_get_mss(struct pf_pdesc *pd) 5047 { 5048 int olen; 5049 uint8_t opts[MAX_TCPOPTLEN], *opt; 5050 u_int16_t mss = V_tcp_mssdflt; 5051 5052 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 5053 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 5054 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 5055 return (0); 5056 5057 opt = opts; 5058 while ((opt = pf_find_tcpopt(opt, opts, olen, 5059 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 5060 memcpy(&mss, (opt + 2), 2); 5061 mss = ntohs(mss); 5062 opt += opt[1]; 5063 } 5064 5065 return (mss); 5066 } 5067 5068 static u_int16_t 5069 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 5070 { 5071 struct nhop_object *nh; 5072 #ifdef INET6 5073 struct in6_addr dst6; 5074 uint32_t scopeid; 5075 #endif /* INET6 */ 5076 int hlen = 0; 5077 uint16_t mss = 0; 5078 5079 NET_EPOCH_ASSERT(); 5080 5081 switch (af) { 5082 #ifdef INET 5083 case AF_INET: 5084 hlen = sizeof(struct ip); 5085 nh = fib4_lookup(rtableid, addr->v4, 0, 0, 0); 5086 if (nh != NULL) 5087 mss = nh->nh_mtu - hlen - sizeof(struct tcphdr); 5088 break; 5089 #endif /* INET */ 5090 #ifdef INET6 5091 case AF_INET6: 5092 hlen = sizeof(struct ip6_hdr); 5093 in6_splitscope(&addr->v6, &dst6, &scopeid); 5094 nh = fib6_lookup(rtableid, &dst6, scopeid, 0, 0); 5095 if (nh != NULL) 5096 mss = nh->nh_mtu - hlen - sizeof(struct tcphdr); 5097 break; 5098 #endif /* INET6 */ 5099 } 5100 5101 mss = max(V_tcp_mssdflt, mss); 5102 mss = min(mss, offer); 5103 mss = max(mss, 64); /* sanity - at least max opt space */ 5104 return (mss); 5105 } 5106 5107 static u_int32_t 5108 pf_tcp_iss(struct pf_pdesc *pd) 5109 { 5110 SHA512_CTX ctx; 5111 union { 5112 uint8_t bytes[SHA512_DIGEST_LENGTH]; 5113 uint32_t words[1]; 5114 } digest; 5115 5116 if (V_pf_tcp_secret_init == 0) { 5117 arc4random_buf(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret)); 5118 SHA512_Init(&V_pf_tcp_secret_ctx); 5119 SHA512_Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret, 5120 sizeof(V_pf_tcp_secret)); 5121 V_pf_tcp_secret_init = 1; 5122 } 5123 5124 ctx = V_pf_tcp_secret_ctx; 5125 5126 SHA512_Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 5127 SHA512_Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 5128 switch (pd->af) { 5129 case AF_INET6: 5130 SHA512_Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 5131 SHA512_Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 5132 break; 5133 case AF_INET: 5134 SHA512_Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 5135 SHA512_Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 5136 break; 5137 } 5138 SHA512_Final(digest.bytes, &ctx); 5139 V_pf_tcp_iss_off += 4096; 5140 #define ISN_RANDOM_INCREMENT (4096 - 1) 5141 return (digest.words[0] + (arc4random() & ISN_RANDOM_INCREMENT) + 5142 V_pf_tcp_iss_off); 5143 #undef ISN_RANDOM_INCREMENT 5144 } 5145 5146 static bool 5147 pf_match_eth_addr(const uint8_t *a, const struct pf_keth_rule_addr *r) 5148 { 5149 bool match = true; 5150 5151 /* Always matches if not set */ 5152 if (! r->isset) 5153 return (!r->neg); 5154 5155 for (int i = 0; i < ETHER_ADDR_LEN; i++) { 5156 if ((a[i] & r->mask[i]) != (r->addr[i] & r->mask[i])) { 5157 match = false; 5158 break; 5159 } 5160 } 5161 5162 return (match ^ r->neg); 5163 } 5164 5165 static int 5166 pf_match_eth_tag(struct mbuf *m, struct pf_keth_rule *r, int *tag, int mtag) 5167 { 5168 if (*tag == -1) 5169 *tag = mtag; 5170 5171 return ((!r->match_tag_not && r->match_tag == *tag) || 5172 (r->match_tag_not && r->match_tag != *tag)); 5173 } 5174 5175 static void 5176 pf_bridge_to(struct ifnet *ifp, struct mbuf *m) 5177 { 5178 /* If we don't have the interface drop the packet. */ 5179 if (ifp == NULL) { 5180 m_freem(m); 5181 return; 5182 } 5183 5184 switch (ifp->if_type) { 5185 case IFT_ETHER: 5186 case IFT_XETHER: 5187 case IFT_L2VLAN: 5188 case IFT_BRIDGE: 5189 case IFT_IEEE8023ADLAG: 5190 break; 5191 default: 5192 m_freem(m); 5193 return; 5194 } 5195 5196 ifp->if_transmit(ifp, m); 5197 } 5198 5199 static int 5200 pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0) 5201 { 5202 #ifdef INET 5203 struct ip ip; 5204 #endif /* INET */ 5205 #ifdef INET6 5206 struct ip6_hdr ip6; 5207 #endif /* INET6 */ 5208 struct mbuf *m = *m0; 5209 struct ether_header *e; 5210 struct pf_keth_rule *r, *rm, *a = NULL; 5211 struct pf_keth_ruleset *ruleset = NULL; 5212 struct pf_mtag *mtag; 5213 struct pf_keth_ruleq *rules; 5214 struct pf_addr *src = NULL, *dst = NULL; 5215 struct pfi_kkif *bridge_to; 5216 sa_family_t af = 0; 5217 uint16_t proto; 5218 int asd = 0, match = 0; 5219 int tag = -1; 5220 uint8_t action; 5221 struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACK_MAX]; 5222 5223 MPASS(kif->pfik_ifp->if_vnet == curvnet); 5224 NET_EPOCH_ASSERT(); 5225 5226 PF_RULES_RLOCK_TRACKER; 5227 5228 SDT_PROBE3(pf, eth, test_rule, entry, dir, kif->pfik_ifp, m); 5229 5230 mtag = pf_find_mtag(m); 5231 if (mtag != NULL && mtag->flags & PF_MTAG_FLAG_DUMMYNET) { 5232 /* Dummynet re-injects packets after they've 5233 * completed their delay. We've already 5234 * processed them, so pass unconditionally. */ 5235 5236 /* But only once. We may see the packet multiple times (e.g. 5237 * PFIL_IN/PFIL_OUT). */ 5238 pf_dummynet_flag_remove(m, mtag); 5239 5240 return (PF_PASS); 5241 } 5242 5243 if (__predict_false(m->m_len < sizeof(struct ether_header)) && 5244 (m = *m0 = m_pullup(*m0, sizeof(struct ether_header))) == NULL) { 5245 DPFPRINTF(PF_DEBUG_URGENT, 5246 "%s: m_len < sizeof(struct ether_header)" 5247 ", pullup failed", __func__); 5248 return (PF_DROP); 5249 } 5250 e = mtod(m, struct ether_header *); 5251 proto = ntohs(e->ether_type); 5252 5253 switch (proto) { 5254 #ifdef INET 5255 case ETHERTYPE_IP: { 5256 if (m_length(m, NULL) < (sizeof(struct ether_header) + 5257 sizeof(ip))) 5258 return (PF_DROP); 5259 5260 af = AF_INET; 5261 m_copydata(m, sizeof(struct ether_header), sizeof(ip), 5262 (caddr_t)&ip); 5263 src = (struct pf_addr *)&ip.ip_src; 5264 dst = (struct pf_addr *)&ip.ip_dst; 5265 break; 5266 } 5267 #endif /* INET */ 5268 #ifdef INET6 5269 case ETHERTYPE_IPV6: { 5270 if (m_length(m, NULL) < (sizeof(struct ether_header) + 5271 sizeof(ip6))) 5272 return (PF_DROP); 5273 5274 af = AF_INET6; 5275 m_copydata(m, sizeof(struct ether_header), sizeof(ip6), 5276 (caddr_t)&ip6); 5277 src = (struct pf_addr *)&ip6.ip6_src; 5278 dst = (struct pf_addr *)&ip6.ip6_dst; 5279 break; 5280 } 5281 #endif /* INET6 */ 5282 } 5283 5284 PF_RULES_RLOCK(); 5285 5286 ruleset = V_pf_keth; 5287 rules = atomic_load_ptr(&ruleset->active.rules); 5288 for (r = TAILQ_FIRST(rules), rm = NULL; r != NULL;) { 5289 counter_u64_add(r->evaluations, 1); 5290 SDT_PROBE2(pf, eth, test_rule, test, r->nr, r); 5291 5292 if (pfi_kkif_match(r->kif, kif) == r->ifnot) { 5293 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, 5294 "kif"); 5295 r = r->skip[PFE_SKIP_IFP].ptr; 5296 } 5297 else if (r->direction && r->direction != dir) { 5298 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, 5299 "dir"); 5300 r = r->skip[PFE_SKIP_DIR].ptr; 5301 } 5302 else if (r->proto && r->proto != proto) { 5303 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, 5304 "proto"); 5305 r = r->skip[PFE_SKIP_PROTO].ptr; 5306 } 5307 else if (! pf_match_eth_addr(e->ether_shost, &r->src)) { 5308 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, 5309 "src"); 5310 r = r->skip[PFE_SKIP_SRC_ADDR].ptr; 5311 } 5312 else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) { 5313 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, 5314 "dst"); 5315 r = r->skip[PFE_SKIP_DST_ADDR].ptr; 5316 } 5317 else if (src != NULL && PF_MISMATCHAW(&r->ipsrc.addr, src, af, 5318 r->ipsrc.neg, kif, M_GETFIB(m))) { 5319 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, 5320 "ip_src"); 5321 r = r->skip[PFE_SKIP_SRC_IP_ADDR].ptr; 5322 } 5323 else if (dst != NULL && PF_MISMATCHAW(&r->ipdst.addr, dst, af, 5324 r->ipdst.neg, kif, M_GETFIB(m))) { 5325 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, 5326 "ip_dst"); 5327 r = r->skip[PFE_SKIP_DST_IP_ADDR].ptr; 5328 } 5329 else if (r->match_tag && !pf_match_eth_tag(m, r, &tag, 5330 mtag ? mtag->tag : 0)) { 5331 SDT_PROBE3(pf, eth, test_rule, mismatch, r->nr, r, 5332 "match_tag"); 5333 r = TAILQ_NEXT(r, entries); 5334 } 5335 else { 5336 if (r->tag) 5337 tag = r->tag; 5338 if (r->anchor == NULL) { 5339 /* Rule matches */ 5340 rm = r; 5341 5342 SDT_PROBE2(pf, eth, test_rule, match, r->nr, r); 5343 5344 if (r->quick) 5345 break; 5346 5347 r = TAILQ_NEXT(r, entries); 5348 } else { 5349 pf_step_into_keth_anchor(anchor_stack, &asd, 5350 &ruleset, &r, &a, &match); 5351 } 5352 } 5353 if (r == NULL && pf_step_out_of_keth_anchor(anchor_stack, &asd, 5354 &ruleset, &r, &a, &match)) 5355 break; 5356 } 5357 5358 r = rm; 5359 5360 SDT_PROBE2(pf, eth, test_rule, final_match, (r != NULL ? r->nr : -1), r); 5361 5362 /* Default to pass. */ 5363 if (r == NULL) { 5364 PF_RULES_RUNLOCK(); 5365 return (PF_PASS); 5366 } 5367 5368 /* Execute action. */ 5369 counter_u64_add(r->packets[dir == PF_OUT], 1); 5370 counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL)); 5371 pf_update_timestamp(r); 5372 5373 /* Shortcut. Don't tag if we're just going to drop anyway. */ 5374 if (r->action == PF_DROP) { 5375 PF_RULES_RUNLOCK(); 5376 return (PF_DROP); 5377 } 5378 5379 if (tag > 0) { 5380 if (mtag == NULL) 5381 mtag = pf_get_mtag(m); 5382 if (mtag == NULL) { 5383 PF_RULES_RUNLOCK(); 5384 counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1); 5385 return (PF_DROP); 5386 } 5387 mtag->tag = tag; 5388 } 5389 5390 if (r->qid != 0) { 5391 if (mtag == NULL) 5392 mtag = pf_get_mtag(m); 5393 if (mtag == NULL) { 5394 PF_RULES_RUNLOCK(); 5395 counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1); 5396 return (PF_DROP); 5397 } 5398 mtag->qid = r->qid; 5399 } 5400 5401 action = r->action; 5402 bridge_to = r->bridge_to; 5403 5404 /* Dummynet */ 5405 if (r->dnpipe) { 5406 struct ip_fw_args dnflow; 5407 5408 /* Drop packet if dummynet is not loaded. */ 5409 if (ip_dn_io_ptr == NULL) { 5410 PF_RULES_RUNLOCK(); 5411 m_freem(m); 5412 counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1); 5413 return (PF_DROP); 5414 } 5415 if (mtag == NULL) 5416 mtag = pf_get_mtag(m); 5417 if (mtag == NULL) { 5418 PF_RULES_RUNLOCK(); 5419 counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1); 5420 return (PF_DROP); 5421 } 5422 5423 bzero(&dnflow, sizeof(dnflow)); 5424 5425 /* We don't have port numbers here, so we set 0. That means 5426 * that we'll be somewhat limited in distinguishing flows (i.e. 5427 * only based on IP addresses, not based on port numbers), but 5428 * it's better than nothing. */ 5429 dnflow.f_id.dst_port = 0; 5430 dnflow.f_id.src_port = 0; 5431 dnflow.f_id.proto = 0; 5432 5433 dnflow.rule.info = r->dnpipe; 5434 dnflow.rule.info |= IPFW_IS_DUMMYNET; 5435 if (r->dnflags & PFRULE_DN_IS_PIPE) 5436 dnflow.rule.info |= IPFW_IS_PIPE; 5437 5438 dnflow.f_id.extra = dnflow.rule.info; 5439 5440 dnflow.flags = dir == PF_IN ? IPFW_ARGS_IN : IPFW_ARGS_OUT; 5441 dnflow.flags |= IPFW_ARGS_ETHER; 5442 dnflow.ifp = kif->pfik_ifp; 5443 5444 switch (af) { 5445 case AF_INET: 5446 dnflow.f_id.addr_type = 4; 5447 dnflow.f_id.src_ip = src->v4.s_addr; 5448 dnflow.f_id.dst_ip = dst->v4.s_addr; 5449 break; 5450 case AF_INET6: 5451 dnflow.flags |= IPFW_ARGS_IP6; 5452 dnflow.f_id.addr_type = 6; 5453 dnflow.f_id.src_ip6 = src->v6; 5454 dnflow.f_id.dst_ip6 = dst->v6; 5455 break; 5456 } 5457 5458 PF_RULES_RUNLOCK(); 5459 5460 mtag->flags |= PF_MTAG_FLAG_DUMMYNET; 5461 ip_dn_io_ptr(m0, &dnflow); 5462 if (*m0 != NULL) 5463 pf_dummynet_flag_remove(m, mtag); 5464 } else { 5465 PF_RULES_RUNLOCK(); 5466 } 5467 5468 if (action == PF_PASS && bridge_to) { 5469 pf_bridge_to(bridge_to->pfik_ifp, *m0); 5470 *m0 = NULL; /* We've eaten the packet. */ 5471 } 5472 5473 return (action); 5474 } 5475 5476 #define PF_TEST_ATTRIB(t, a) \ 5477 if (t) { \ 5478 r = a; \ 5479 continue; \ 5480 } else do { \ 5481 } while (0) 5482 5483 static __inline u_short 5484 pf_rule_apply_nat(struct pf_test_ctx *ctx, struct pf_krule *r) 5485 { 5486 struct pf_pdesc *pd = ctx->pd; 5487 u_short transerror; 5488 u_int8_t nat_action; 5489 5490 if (r->rule_flag & PFRULE_AFTO) { 5491 /* Don't translate if there was an old style NAT rule */ 5492 if (ctx->nr != NULL) 5493 return (PFRES_TRANSLATE); 5494 5495 /* pass af-to rules, unsupported on match rules */ 5496 KASSERT(r->action != PF_MATCH, ("%s: af-to on match rule", __func__)); 5497 /* XXX I can imagine scenarios where we have both NAT and RDR source tracking */ 5498 ctx->nat_pool = &(r->nat); 5499 ctx->nr = r; 5500 pd->naf = r->naf; 5501 if (pf_get_transaddr_af(ctx->nr, pd) == -1) { 5502 return (PFRES_TRANSLATE); 5503 } 5504 return (PFRES_MATCH); 5505 } else if (r->rdr.cur || r->nat.cur) { 5506 /* Don't translate if there was an old style NAT rule */ 5507 if (ctx->nr != NULL) 5508 return (PFRES_TRANSLATE); 5509 5510 /* match/pass nat-to/rdr-to rules */ 5511 ctx->nr = r; 5512 if (r->nat.cur) { 5513 nat_action = PF_NAT; 5514 ctx->nat_pool = &(r->nat); 5515 } else { 5516 nat_action = PF_RDR; 5517 ctx->nat_pool = &(r->rdr); 5518 } 5519 5520 transerror = pf_get_transaddr(ctx, ctx->nr, 5521 nat_action, ctx->nat_pool); 5522 if (transerror == PFRES_MATCH) { 5523 ctx->rewrite += pf_translate_compat(ctx); 5524 return(PFRES_MATCH); 5525 } 5526 return (transerror); 5527 } 5528 5529 return (PFRES_MAX); 5530 } 5531 5532 enum pf_test_status 5533 pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) 5534 { 5535 struct pf_krule_item *ri; 5536 struct pf_krule *r; 5537 struct pf_krule *save_a; 5538 struct pf_kruleset *save_aruleset; 5539 struct pf_pdesc *pd = ctx->pd; 5540 u_short transerror; 5541 5542 r = TAILQ_FIRST(ruleset->rules[PF_RULESET_FILTER].active.ptr); 5543 while (r != NULL) { 5544 if (ctx->pd->related_rule) { 5545 *ctx->rm = ctx->pd->related_rule; 5546 break; 5547 } 5548 pf_counter_u64_add(&r->evaluations, 1); 5549 PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot, 5550 r->skip[PF_SKIP_IFP]); 5551 PF_TEST_ATTRIB(r->direction && r->direction != pd->dir, 5552 r->skip[PF_SKIP_DIR]); 5553 PF_TEST_ATTRIB(r->af && r->af != pd->af, 5554 r->skip[PF_SKIP_AF]); 5555 PF_TEST_ATTRIB(r->proto && r->proto != pd->proto, 5556 r->skip[PF_SKIP_PROTO]); 5557 PF_TEST_ATTRIB(PF_MISMATCHAW(&r->src.addr, &pd->nsaddr, pd->naf, 5558 r->src.neg, pd->kif, M_GETFIB(pd->m)), 5559 r->skip[PF_SKIP_SRC_ADDR]); 5560 PF_TEST_ATTRIB(PF_MISMATCHAW(&r->dst.addr, &pd->ndaddr, pd->af, 5561 r->dst.neg, NULL, M_GETFIB(pd->m)), 5562 r->skip[PF_SKIP_DST_ADDR]); 5563 switch (pd->virtual_proto) { 5564 case PF_VPROTO_FRAGMENT: 5565 /* tcp/udp only. port_op always 0 in other cases */ 5566 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 5567 TAILQ_NEXT(r, entries)); 5568 PF_TEST_ATTRIB((pd->proto == IPPROTO_TCP && r->flagset), 5569 TAILQ_NEXT(r, entries)); 5570 /* icmp only. type/code always 0 in other cases */ 5571 PF_TEST_ATTRIB((r->type || r->code), 5572 TAILQ_NEXT(r, entries)); 5573 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 5574 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 5575 TAILQ_NEXT(r, entries)); 5576 break; 5577 5578 case IPPROTO_TCP: 5579 PF_TEST_ATTRIB((r->flagset & tcp_get_flags(ctx->th)) 5580 != r->flags, 5581 TAILQ_NEXT(r, entries)); 5582 /* FALLTHROUGH */ 5583 case IPPROTO_SCTP: 5584 case IPPROTO_UDP: 5585 /* tcp/udp only. port_op always 0 in other cases */ 5586 PF_TEST_ATTRIB(r->src.port_op && !pf_match_port(r->src.port_op, 5587 r->src.port[0], r->src.port[1], pd->nsport), 5588 r->skip[PF_SKIP_SRC_PORT]); 5589 /* tcp/udp only. port_op always 0 in other cases */ 5590 PF_TEST_ATTRIB(r->dst.port_op && !pf_match_port(r->dst.port_op, 5591 r->dst.port[0], r->dst.port[1], pd->ndport), 5592 r->skip[PF_SKIP_DST_PORT]); 5593 /* tcp/udp only. uid.op always 0 in other cases */ 5594 PF_TEST_ATTRIB(r->uid.op && (pd->lookup.done || (pd->lookup.done = 5595 pf_socket_lookup(pd), 1)) && 5596 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], 5597 pd->lookup.uid), 5598 TAILQ_NEXT(r, entries)); 5599 /* tcp/udp only. gid.op always 0 in other cases */ 5600 PF_TEST_ATTRIB(r->gid.op && (pd->lookup.done || (pd->lookup.done = 5601 pf_socket_lookup(pd), 1)) && 5602 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], 5603 pd->lookup.gid), 5604 TAILQ_NEXT(r, entries)); 5605 break; 5606 5607 case IPPROTO_ICMP: 5608 case IPPROTO_ICMPV6: 5609 /* icmp only. type always 0 in other cases */ 5610 PF_TEST_ATTRIB(r->type && r->type != ctx->icmptype + 1, 5611 TAILQ_NEXT(r, entries)); 5612 /* icmp only. type always 0 in other cases */ 5613 PF_TEST_ATTRIB(r->code && r->code != ctx->icmpcode + 1, 5614 TAILQ_NEXT(r, entries)); 5615 break; 5616 5617 default: 5618 break; 5619 } 5620 PF_TEST_ATTRIB(r->tos && !(r->tos == pd->tos), 5621 TAILQ_NEXT(r, entries)); 5622 PF_TEST_ATTRIB(r->prio && 5623 !pf_match_ieee8021q_pcp(r->prio, pd->m), 5624 TAILQ_NEXT(r, entries)); 5625 PF_TEST_ATTRIB(r->prob && 5626 r->prob <= arc4random(), 5627 TAILQ_NEXT(r, entries)); 5628 PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, 5629 &ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0), 5630 TAILQ_NEXT(r, entries)); 5631 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) == 5632 r->rcvifnot), 5633 TAILQ_NEXT(r, entries)); 5634 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 5635 pd->virtual_proto != PF_VPROTO_FRAGMENT), 5636 TAILQ_NEXT(r, entries)); 5637 PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && 5638 (pd->virtual_proto != IPPROTO_TCP || !pf_osfp_match( 5639 pf_osfp_fingerprint(pd, ctx->th), 5640 r->os_fingerprint)), 5641 TAILQ_NEXT(r, entries)); 5642 /* must be last! */ 5643 if (r->pktrate.limit) { 5644 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 5645 TAILQ_NEXT(r, entries)); 5646 } 5647 /* FALLTHROUGH */ 5648 if (r->tag) 5649 ctx->tag = r->tag; 5650 if (r->anchor == NULL) { 5651 if (r->action == PF_MATCH) { 5652 /* 5653 * Apply translations before increasing counters, 5654 * in case it fails. 5655 */ 5656 transerror = pf_rule_apply_nat(ctx, r); 5657 switch (transerror) { 5658 case PFRES_MATCH: 5659 /* Translation action found in rule and applied successfully */ 5660 case PFRES_MAX: 5661 /* No translation action found in rule */ 5662 break; 5663 default: 5664 /* Translation action found in rule but failed to apply */ 5665 REASON_SET(&ctx->reason, transerror); 5666 return (PF_TEST_FAIL); 5667 } 5668 ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO); 5669 if (ri == NULL) { 5670 REASON_SET(&ctx->reason, PFRES_MEMORY); 5671 return (PF_TEST_FAIL); 5672 } 5673 ri->r = r; 5674 SLIST_INSERT_HEAD(&ctx->rules, ri, entry); 5675 pf_counter_u64_critical_enter(); 5676 pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1); 5677 pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len); 5678 pf_counter_u64_critical_exit(); 5679 pf_rule_to_actions(r, &pd->act); 5680 if (r->log) 5681 PFLOG_PACKET(r->action, PFRES_MATCH, r, 5682 ctx->a, ruleset, pd, 1, NULL); 5683 } else { 5684 /* 5685 * found matching r 5686 */ 5687 *ctx->rm = r; 5688 /* 5689 * anchor, with ruleset, where r belongs to 5690 */ 5691 *ctx->am = ctx->a; 5692 /* 5693 * ruleset where r belongs to 5694 */ 5695 *ctx->rsm = ruleset; 5696 /* 5697 * ruleset, where anchor belongs to. 5698 */ 5699 ctx->arsm = ctx->aruleset; 5700 } 5701 if (pd->act.log & PF_LOG_MATCHES) 5702 pf_log_matches(pd, r, ctx->a, ruleset, &ctx->rules); 5703 if (r->quick) { 5704 ctx->test_status = PF_TEST_QUICK; 5705 break; 5706 } 5707 } else { 5708 save_a = ctx->a; 5709 save_aruleset = ctx->aruleset; 5710 5711 ctx->a = r; /* remember anchor */ 5712 ctx->aruleset = ruleset; /* and its ruleset */ 5713 if (ctx->a->quick) 5714 ctx->test_status = PF_TEST_QUICK; 5715 /* 5716 * Note: we don't need to restore if we are not going 5717 * to continue with ruleset evaluation. 5718 */ 5719 if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) { 5720 break; 5721 } 5722 ctx->a = save_a; 5723 ctx->aruleset = save_aruleset; 5724 } 5725 r = TAILQ_NEXT(r, entries); 5726 } 5727 5728 return (ctx->test_status); 5729 } 5730 5731 static int 5732 pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, 5733 struct pf_pdesc *pd, struct pf_krule **am, 5734 struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp) 5735 { 5736 struct pf_krule *r = NULL; 5737 struct pf_kruleset *ruleset = NULL; 5738 struct pf_krule_item *ri; 5739 struct pf_test_ctx ctx; 5740 u_short transerror; 5741 int action = PF_PASS; 5742 u_int16_t bproto_sum = 0, bip_sum = 0; 5743 enum pf_test_status rv; 5744 5745 PF_RULES_RASSERT(); 5746 5747 bzero(&ctx, sizeof(ctx)); 5748 ctx.tag = -1; 5749 ctx.pd = pd; 5750 ctx.rm = rm; 5751 ctx.am = am; 5752 ctx.rsm = rsm; 5753 ctx.th = &pd->hdr.tcp; 5754 ctx.reason = *reason; 5755 SLIST_INIT(&ctx.rules); 5756 5757 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 5758 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 5759 5760 if (inp != NULL) { 5761 INP_LOCK_ASSERT(inp); 5762 pd->lookup.uid = inp->inp_cred->cr_uid; 5763 pd->lookup.gid = inp->inp_cred->cr_gid; 5764 pd->lookup.done = 1; 5765 } 5766 5767 if (pd->ip_sum) 5768 bip_sum = *pd->ip_sum; 5769 5770 switch (pd->virtual_proto) { 5771 case IPPROTO_TCP: 5772 bproto_sum = ctx.th->th_sum; 5773 pd->nsport = ctx.th->th_sport; 5774 pd->ndport = ctx.th->th_dport; 5775 break; 5776 case IPPROTO_UDP: 5777 bproto_sum = pd->hdr.udp.uh_sum; 5778 pd->nsport = pd->hdr.udp.uh_sport; 5779 pd->ndport = pd->hdr.udp.uh_dport; 5780 break; 5781 case IPPROTO_SCTP: 5782 pd->nsport = pd->hdr.sctp.src_port; 5783 pd->ndport = pd->hdr.sctp.dest_port; 5784 break; 5785 #ifdef INET 5786 case IPPROTO_ICMP: 5787 MPASS(pd->af == AF_INET); 5788 ctx.icmptype = pd->hdr.icmp.icmp_type; 5789 ctx.icmpcode = pd->hdr.icmp.icmp_code; 5790 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 5791 &ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type); 5792 if (ctx.icmp_dir == PF_IN) { 5793 pd->nsport = ctx.virtual_id; 5794 pd->ndport = ctx.virtual_type; 5795 } else { 5796 pd->nsport = ctx.virtual_type; 5797 pd->ndport = ctx.virtual_id; 5798 } 5799 break; 5800 #endif /* INET */ 5801 #ifdef INET6 5802 case IPPROTO_ICMPV6: 5803 MPASS(pd->af == AF_INET6); 5804 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 5805 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 5806 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 5807 &ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type); 5808 if (ctx.icmp_dir == PF_IN) { 5809 pd->nsport = ctx.virtual_id; 5810 pd->ndport = ctx.virtual_type; 5811 } else { 5812 pd->nsport = ctx.virtual_type; 5813 pd->ndport = ctx.virtual_id; 5814 } 5815 5816 break; 5817 #endif /* INET6 */ 5818 default: 5819 pd->nsport = pd->ndport = 0; 5820 break; 5821 } 5822 pd->osport = pd->nsport; 5823 pd->odport = pd->ndport; 5824 5825 /* check packet for BINAT/NAT/RDR */ 5826 transerror = pf_get_translation(&ctx); 5827 switch (transerror) { 5828 default: 5829 /* A translation error occurred. */ 5830 REASON_SET(&ctx.reason, transerror); 5831 goto cleanup; 5832 case PFRES_MAX: 5833 /* No match. */ 5834 break; 5835 case PFRES_MATCH: 5836 KASSERT(ctx.sk != NULL, ("%s: null sk", __func__)); 5837 KASSERT(ctx.nk != NULL, ("%s: null nk", __func__)); 5838 if (ctx.nr->log) { 5839 PFLOG_PACKET(ctx.nr->action, PFRES_MATCH, ctx.nr, ctx.a, 5840 ruleset, pd, 1, NULL); 5841 } 5842 5843 ctx.rewrite += pf_translate_compat(&ctx); 5844 ctx.nat_pool = &(ctx.nr->rdr); 5845 } 5846 5847 ruleset = &pf_main_ruleset; 5848 rv = pf_match_rule(&ctx, ruleset); 5849 if (rv == PF_TEST_FAIL) { 5850 /* 5851 * Reason has been set in pf_match_rule() already. 5852 */ 5853 goto cleanup; 5854 } 5855 5856 r = *ctx.rm; /* matching rule */ 5857 ctx.a = *ctx.am; /* rule that defines an anchor containing 'r' */ 5858 ruleset = *ctx.rsm; /* ruleset of the anchor defined by the rule 'a' */ 5859 ctx.aruleset = ctx.arsm; /* ruleset of the 'a' rule itself */ 5860 5861 REASON_SET(&ctx.reason, PFRES_MATCH); 5862 5863 /* apply actions for last matching pass/block rule */ 5864 pf_rule_to_actions(r, &pd->act); 5865 transerror = pf_rule_apply_nat(&ctx, r); 5866 switch (transerror) { 5867 case PFRES_MATCH: 5868 /* Translation action found in rule and applied successfully */ 5869 case PFRES_MAX: 5870 /* No translation action found in rule */ 5871 break; 5872 default: 5873 /* Translation action found in rule but failed to apply */ 5874 REASON_SET(&ctx.reason, transerror); 5875 goto cleanup; 5876 } 5877 5878 if (r->log) { 5879 if (ctx.rewrite) 5880 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any); 5881 PFLOG_PACKET(r->action, ctx.reason, r, ctx.a, ruleset, pd, 1, NULL); 5882 } 5883 if (pd->act.log & PF_LOG_MATCHES) 5884 pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules); 5885 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 5886 (r->action == PF_DROP) && 5887 ((r->rule_flag & PFRULE_RETURNRST) || 5888 (r->rule_flag & PFRULE_RETURNICMP) || 5889 (r->rule_flag & PFRULE_RETURN))) { 5890 pf_return(r, ctx.nr, pd, ctx.th, bproto_sum, 5891 bip_sum, &ctx.reason, r->rtableid); 5892 } 5893 5894 if (r->action == PF_DROP) 5895 goto cleanup; 5896 5897 if (ctx.tag > 0 && pf_tag_packet(pd, ctx.tag)) { 5898 REASON_SET(&ctx.reason, PFRES_MEMORY); 5899 goto cleanup; 5900 } 5901 if (pd->act.rtableid >= 0) 5902 M_SETFIB(pd->m, pd->act.rtableid); 5903 5904 if (r->rt) { 5905 /* 5906 * Set act.rt here instead of in pf_rule_to_actions() because 5907 * it is applied only from the last pass rule. 5908 */ 5909 pd->act.rt = r->rt; 5910 if ((transerror = pf_map_addr_sn(pd->af, r, pd->src, 5911 &pd->act.rt_addr, &pd->act.rt_kif, NULL, &(r->route), 5912 PF_SN_ROUTE)) != PFRES_MATCH) { 5913 REASON_SET(&ctx.reason, transerror); 5914 goto cleanup; 5915 } 5916 } 5917 5918 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 5919 (!ctx.state_icmp && (r->keep_state || ctx.nr != NULL || 5920 (pd->flags & PFDESC_TCP_NORM)))) { 5921 bool nat64; 5922 5923 action = pf_create_state(r, &ctx, sm, bproto_sum, bip_sum); 5924 ctx.sk = ctx.nk = NULL; 5925 if (action != PF_PASS) { 5926 pf_udp_mapping_release(ctx.udp_mapping); 5927 if (r->log || (ctx.nr != NULL && ctx.nr->log) || 5928 ctx.reason == PFRES_MEMORY) 5929 pd->act.log |= PF_LOG_FORCE; 5930 if (action == PF_DROP && 5931 (r->rule_flag & PFRULE_RETURN)) 5932 pf_return(r, ctx.nr, pd, ctx.th, 5933 bproto_sum, bip_sum, &ctx.reason, 5934 pd->act.rtableid); 5935 *reason = ctx.reason; 5936 return (action); 5937 } 5938 5939 nat64 = pd->af != pd->naf; 5940 if (nat64) { 5941 int ret; 5942 5943 if (ctx.sk == NULL) 5944 ctx.sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE]; 5945 if (ctx.nk == NULL) 5946 ctx.nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK]; 5947 5948 if (pd->dir == PF_IN) { 5949 ret = pf_translate(pd, &ctx.sk->addr[pd->didx], 5950 ctx.sk->port[pd->didx], &ctx.sk->addr[pd->sidx], 5951 ctx.sk->port[pd->sidx], ctx.virtual_type, 5952 ctx.icmp_dir); 5953 } else { 5954 ret = pf_translate(pd, &ctx.sk->addr[pd->sidx], 5955 ctx.sk->port[pd->sidx], &ctx.sk->addr[pd->didx], 5956 ctx.sk->port[pd->didx], ctx.virtual_type, 5957 ctx.icmp_dir); 5958 } 5959 5960 if (ret < 0) 5961 goto cleanup; 5962 5963 ctx.rewrite += ret; 5964 5965 if (ctx.rewrite && ctx.sk->af != ctx.nk->af) 5966 action = PF_AFRT; 5967 } 5968 } else { 5969 while ((ri = SLIST_FIRST(&ctx.rules))) { 5970 SLIST_REMOVE_HEAD(&ctx.rules, entry); 5971 free(ri, M_PF_RULE_ITEM); 5972 } 5973 5974 uma_zfree(V_pf_state_key_z, ctx.sk); 5975 uma_zfree(V_pf_state_key_z, ctx.nk); 5976 ctx.sk = ctx.nk = NULL; 5977 pf_udp_mapping_release(ctx.udp_mapping); 5978 } 5979 5980 /* copy back packet headers if we performed NAT operations */ 5981 if (ctx.rewrite) 5982 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any); 5983 5984 if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) && 5985 pd->dir == PF_OUT && 5986 V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m)) { 5987 /* 5988 * We want the state created, but we dont 5989 * want to send this in case a partner 5990 * firewall has to know about it to allow 5991 * replies through it. 5992 */ 5993 *reason = ctx.reason; 5994 return (PF_DEFER); 5995 } 5996 5997 *reason = ctx.reason; 5998 return (action); 5999 6000 cleanup: 6001 while ((ri = SLIST_FIRST(&ctx.rules))) { 6002 SLIST_REMOVE_HEAD(&ctx.rules, entry); 6003 free(ri, M_PF_RULE_ITEM); 6004 } 6005 6006 uma_zfree(V_pf_state_key_z, ctx.sk); 6007 uma_zfree(V_pf_state_key_z, ctx.nk); 6008 pf_udp_mapping_release(ctx.udp_mapping); 6009 *reason = ctx.reason; 6010 6011 return (PF_DROP); 6012 } 6013 6014 static int 6015 pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, 6016 struct pf_kstate **sm, u_int16_t bproto_sum, u_int16_t bip_sum) 6017 { 6018 struct pf_pdesc *pd = ctx->pd; 6019 struct pf_kstate *s = NULL; 6020 struct pf_ksrc_node *sns[PF_SN_MAX] = { NULL }; 6021 /* 6022 * XXXKS: The hash for PF_SN_LIMIT and PF_SN_ROUTE should be the same 6023 * but for PF_SN_NAT it is different. Don't try optimizing it, 6024 * just store all 3 hashes. 6025 */ 6026 struct pf_srchash *snhs[PF_SN_MAX] = { NULL }; 6027 struct tcphdr *th = &pd->hdr.tcp; 6028 u_int16_t mss = V_tcp_mssdflt; 6029 u_short sn_reason; 6030 struct pf_krule_item *ri; 6031 6032 /* check maximums */ 6033 if (r->max_states && 6034 (counter_u64_fetch(r->states_cur) >= r->max_states)) { 6035 counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1); 6036 REASON_SET(&ctx->reason, PFRES_MAXSTATES); 6037 goto csfailed; 6038 } 6039 /* src node for limits */ 6040 if ((r->rule_flag & PFRULE_SRCTRACK) && 6041 (sn_reason = pf_insert_src_node(sns, snhs, r, pd->src, pd->af, 6042 NULL, NULL, PF_SN_LIMIT)) != 0) { 6043 REASON_SET(&ctx->reason, sn_reason); 6044 goto csfailed; 6045 } 6046 /* src node for route-to rule */ 6047 if (r->rt) { 6048 if ((r->route.opts & PF_POOL_STICKYADDR) && 6049 (sn_reason = pf_insert_src_node(sns, snhs, r, pd->src, 6050 pd->af, &pd->act.rt_addr, pd->act.rt_kif, 6051 PF_SN_ROUTE)) != 0) { 6052 REASON_SET(&ctx->reason, sn_reason); 6053 goto csfailed; 6054 } 6055 } 6056 /* src node for translation rule */ 6057 if (ctx->nr != NULL) { 6058 KASSERT(ctx->nat_pool != NULL, ("%s: nat_pool is NULL", __func__)); 6059 /* 6060 * The NAT addresses are chosen during ruleset parsing. 6061 * The new afto code stores post-nat addresses in nsaddr. 6062 * The old nat code (also used for new nat-to rules) creates 6063 * state keys and stores addresses in them. 6064 */ 6065 if ((ctx->nat_pool->opts & PF_POOL_STICKYADDR) && 6066 (sn_reason = pf_insert_src_node(sns, snhs, ctx->nr, 6067 ctx->sk ? &(ctx->sk->addr[pd->sidx]) : pd->src, pd->af, 6068 ctx->nk ? &(ctx->nk->addr[1]) : &(pd->nsaddr), NULL, 6069 PF_SN_NAT)) != 0 ) { 6070 REASON_SET(&ctx->reason, sn_reason); 6071 goto csfailed; 6072 } 6073 } 6074 s = pf_alloc_state(M_NOWAIT); 6075 if (s == NULL) { 6076 REASON_SET(&ctx->reason, PFRES_MEMORY); 6077 goto csfailed; 6078 } 6079 s->rule = r; 6080 s->nat_rule = ctx->nr; 6081 s->anchor = ctx->a; 6082 memcpy(&s->match_rules, &ctx->rules, sizeof(s->match_rules)); 6083 memcpy(&s->act, &pd->act, sizeof(struct pf_rule_actions)); 6084 6085 if (pd->act.allow_opts) 6086 s->state_flags |= PFSTATE_ALLOWOPTS; 6087 if (r->rule_flag & PFRULE_STATESLOPPY) 6088 s->state_flags |= PFSTATE_SLOPPY; 6089 if (pd->flags & PFDESC_TCP_NORM) /* Set by old-style scrub rules */ 6090 s->state_flags |= PFSTATE_SCRUB_TCP; 6091 if ((r->rule_flag & PFRULE_PFLOW) || 6092 (ctx->nr != NULL && ctx->nr->rule_flag & PFRULE_PFLOW)) 6093 s->state_flags |= PFSTATE_PFLOW; 6094 6095 s->act.log = pd->act.log & PF_LOG_ALL; 6096 s->sync_state = PFSYNC_S_NONE; 6097 s->state_flags |= pd->act.flags; /* Only needed for pfsync and state export */ 6098 6099 if (ctx->nr != NULL) 6100 s->act.log |= ctx->nr->log & PF_LOG_ALL; 6101 switch (pd->proto) { 6102 case IPPROTO_TCP: 6103 s->src.seqlo = ntohl(th->th_seq); 6104 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 6105 if ((tcp_get_flags(th) & (TH_SYN|TH_ACK)) == TH_SYN && 6106 r->keep_state == PF_STATE_MODULATE) { 6107 /* Generate sequence number modulator */ 6108 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 6109 0) 6110 s->src.seqdiff = 1; 6111 pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, 6112 htonl(s->src.seqlo + s->src.seqdiff), 0); 6113 ctx->rewrite = 1; 6114 } else 6115 s->src.seqdiff = 0; 6116 if (tcp_get_flags(th) & TH_SYN) { 6117 s->src.seqhi++; 6118 s->src.wscale = pf_get_wscale(pd); 6119 } 6120 s->src.max_win = MAX(ntohs(th->th_win), 1); 6121 if (s->src.wscale & PF_WSCALE_MASK) { 6122 /* Remove scale factor from initial window */ 6123 int win = s->src.max_win; 6124 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 6125 s->src.max_win = (win - 1) >> 6126 (s->src.wscale & PF_WSCALE_MASK); 6127 } 6128 if (tcp_get_flags(th) & TH_FIN) 6129 s->src.seqhi++; 6130 s->dst.seqhi = 1; 6131 s->dst.max_win = 1; 6132 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); 6133 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); 6134 s->timeout = PFTM_TCP_FIRST_PACKET; 6135 atomic_add_32(&V_pf_status.states_halfopen, 1); 6136 break; 6137 case IPPROTO_UDP: 6138 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); 6139 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 6140 s->timeout = PFTM_UDP_FIRST_PACKET; 6141 break; 6142 case IPPROTO_SCTP: 6143 pf_set_protostate(s, PF_PEER_SRC, SCTP_COOKIE_WAIT); 6144 pf_set_protostate(s, PF_PEER_DST, SCTP_CLOSED); 6145 s->timeout = PFTM_SCTP_FIRST_PACKET; 6146 break; 6147 case IPPROTO_ICMP: 6148 #ifdef INET6 6149 case IPPROTO_ICMPV6: 6150 #endif /* INET6 */ 6151 s->timeout = PFTM_ICMP_FIRST_PACKET; 6152 break; 6153 default: 6154 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); 6155 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 6156 s->timeout = PFTM_OTHER_FIRST_PACKET; 6157 } 6158 6159 s->creation = s->expire = pf_get_uptime(); 6160 6161 if (pd->proto == IPPROTO_TCP) { 6162 if (s->state_flags & PFSTATE_SCRUB_TCP && 6163 pf_normalize_tcp_init(pd, th, &s->src)) { 6164 REASON_SET(&ctx->reason, PFRES_MEMORY); 6165 goto csfailed; 6166 } 6167 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 6168 pf_normalize_tcp_stateful(pd, &ctx->reason, th, s, 6169 &s->src, &s->dst, &ctx->rewrite)) { 6170 /* This really shouldn't happen!!! */ 6171 DPFPRINTF(PF_DEBUG_URGENT, 6172 "%s: tcp normalize failed on first " 6173 "pkt", __func__); 6174 goto csfailed; 6175 } 6176 } else if (pd->proto == IPPROTO_SCTP) { 6177 if (pf_normalize_sctp_init(pd, &s->src, &s->dst)) 6178 goto csfailed; 6179 if (! (pd->sctp_flags & (PFDESC_SCTP_INIT | PFDESC_SCTP_ADD_IP))) 6180 goto csfailed; 6181 } 6182 s->direction = pd->dir; 6183 6184 /* 6185 * sk/nk could already been setup by pf_get_translation(). 6186 */ 6187 if (ctx->sk == NULL && ctx->nk == NULL) { 6188 MPASS(pd->sport == NULL || (pd->osport == *pd->sport)); 6189 MPASS(pd->dport == NULL || (pd->odport == *pd->dport)); 6190 if (pf_state_key_setup(pd, pd->nsport, pd->ndport, 6191 &ctx->sk, &ctx->nk)) { 6192 goto csfailed; 6193 } 6194 } else 6195 KASSERT((ctx->sk != NULL && ctx->nk != NULL), ("%s: nr %p sk %p, nk %p", 6196 __func__, ctx->nr, ctx->sk, ctx->nk)); 6197 6198 /* Swap sk/nk for PF_OUT. */ 6199 if (pf_state_insert(BOUND_IFACE(s, pd), pd->kif, 6200 (pd->dir == PF_IN) ? ctx->sk : ctx->nk, 6201 (pd->dir == PF_IN) ? ctx->nk : ctx->sk, s)) { 6202 REASON_SET(&ctx->reason, PFRES_STATEINS); 6203 goto drop; 6204 } else 6205 *sm = s; 6206 ctx->sk = ctx->nk = NULL; 6207 6208 STATE_INC_COUNTERS(s); 6209 6210 /* 6211 * Lock order is important: first state, then source node. 6212 */ 6213 for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) { 6214 if (pf_src_node_exists(&sns[sn_type], snhs[sn_type])) { 6215 s->sns[sn_type] = sns[sn_type]; 6216 PF_HASHROW_UNLOCK(snhs[sn_type]); 6217 } 6218 } 6219 6220 if (ctx->tag > 0) 6221 s->tag = ctx->tag; 6222 if (pd->proto == IPPROTO_TCP && (tcp_get_flags(th) & (TH_SYN|TH_ACK)) == 6223 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 6224 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 6225 pf_undo_nat(ctx->nr, pd, bip_sum); 6226 s->src.seqhi = arc4random(); 6227 /* Find mss option */ 6228 int rtid = M_GETFIB(pd->m); 6229 mss = pf_get_mss(pd); 6230 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 6231 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 6232 s->src.mss = mss; 6233 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 6234 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 6235 TH_SYN|TH_ACK, 0, s->src.mss, 0, M_SKIP_FIREWALL, 0, 0, 6236 pd->act.rtableid); 6237 REASON_SET(&ctx->reason, PFRES_SYNPROXY); 6238 return (PF_SYNPROXY_DROP); 6239 } 6240 6241 s->udp_mapping = ctx->udp_mapping; 6242 6243 return (PF_PASS); 6244 6245 csfailed: 6246 while ((ri = SLIST_FIRST(&ctx->rules))) { 6247 SLIST_REMOVE_HEAD(&ctx->rules, entry); 6248 free(ri, M_PF_RULE_ITEM); 6249 } 6250 6251 uma_zfree(V_pf_state_key_z, ctx->sk); 6252 uma_zfree(V_pf_state_key_z, ctx->nk); 6253 6254 for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) { 6255 if (pf_src_node_exists(&sns[sn_type], snhs[sn_type])) { 6256 if (--sns[sn_type]->states == 0 && 6257 sns[sn_type]->expire == 0) { 6258 pf_unlink_src_node(sns[sn_type]); 6259 pf_free_src_node(sns[sn_type]); 6260 counter_u64_add( 6261 V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1); 6262 } 6263 PF_HASHROW_UNLOCK(snhs[sn_type]); 6264 } 6265 } 6266 6267 drop: 6268 if (s != NULL) { 6269 pf_src_tree_remove_state(s); 6270 s->timeout = PFTM_UNLINKED; 6271 pf_free_state(s); 6272 } 6273 6274 return (PF_DROP); 6275 } 6276 6277 int 6278 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 6279 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 6280 int icmp_dir) 6281 { 6282 /* 6283 * pf_translate() implements OpenBSD's "new" NAT approach. 6284 * We don't follow it, because it involves a breaking syntax change 6285 * (removing nat/rdr rules, moving it into regular pf rules.) 6286 * It also moves NAT processing to be done after normal rules evaluation 6287 * whereas in FreeBSD that's done before rules processing. 6288 * 6289 * We adopt the function only for nat64, and keep other NAT processing 6290 * before rules processing. 6291 */ 6292 int rewrite = 0; 6293 int afto = pd->af != pd->naf; 6294 6295 MPASS(afto); 6296 6297 switch (pd->proto) { 6298 case IPPROTO_TCP: 6299 case IPPROTO_UDP: 6300 case IPPROTO_SCTP: 6301 if (afto || *pd->sport != sport) { 6302 pf_change_ap(pd, pd->src, pd->sport, 6303 saddr, sport); 6304 rewrite = 1; 6305 } 6306 if (afto || *pd->dport != dport) { 6307 pf_change_ap(pd, pd->dst, pd->dport, 6308 daddr, dport); 6309 rewrite = 1; 6310 } 6311 break; 6312 6313 #ifdef INET 6314 case IPPROTO_ICMP: 6315 /* pf_translate() is also used when logging invalid packets */ 6316 if (pd->af != AF_INET) 6317 return (0); 6318 6319 if (afto) { 6320 if (pf_translate_icmp_af(AF_INET6, &pd->hdr.icmp)) 6321 return (-1); 6322 pd->proto = IPPROTO_ICMPV6; 6323 rewrite = 1; 6324 } 6325 if (virtual_type == htons(ICMP_ECHO)) { 6326 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 6327 6328 if (icmpid != pd->hdr.icmp.icmp_id) { 6329 pd->hdr.icmp.icmp_cksum = pf_cksum_fixup( 6330 pd->hdr.icmp.icmp_cksum, 6331 pd->hdr.icmp.icmp_id, icmpid, 0); 6332 pd->hdr.icmp.icmp_id = icmpid; 6333 /* XXX TODO copyback. */ 6334 rewrite = 1; 6335 } 6336 } 6337 break; 6338 #endif /* INET */ 6339 6340 #ifdef INET6 6341 case IPPROTO_ICMPV6: 6342 /* pf_translate() is also used when logging invalid packets */ 6343 if (pd->af != AF_INET6) 6344 return (0); 6345 6346 if (afto) { 6347 /* ip_sum will be recalculated in pf_translate_af */ 6348 if (pf_translate_icmp_af(AF_INET, &pd->hdr.icmp6)) 6349 return (0); 6350 pd->proto = IPPROTO_ICMP; 6351 rewrite = 1; 6352 } 6353 break; 6354 #endif /* INET6 */ 6355 6356 default: 6357 break; 6358 } 6359 6360 return (rewrite); 6361 } 6362 6363 int 6364 pf_translate_compat(struct pf_test_ctx *ctx) 6365 { 6366 struct pf_pdesc *pd = ctx->pd; 6367 struct pf_state_key *nk = ctx->nk; 6368 struct tcphdr *th = &pd->hdr.tcp; 6369 int rewrite = 0; 6370 6371 KASSERT(ctx->sk != NULL, ("%s: null sk", __func__)); 6372 KASSERT(ctx->nk != NULL, ("%s: null nk", __func__)); 6373 6374 switch (pd->proto) { 6375 case IPPROTO_TCP: 6376 if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) || 6377 nk->port[pd->sidx] != pd->nsport) { 6378 pf_change_ap(pd, pd->src, &th->th_sport, 6379 &nk->addr[pd->sidx], nk->port[pd->sidx]); 6380 pd->sport = &th->th_sport; 6381 pd->nsport = th->th_sport; 6382 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6383 } 6384 6385 if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || 6386 nk->port[pd->didx] != pd->ndport) { 6387 pf_change_ap(pd, pd->dst, &th->th_dport, 6388 &nk->addr[pd->didx], nk->port[pd->didx]); 6389 pd->dport = &th->th_dport; 6390 pd->ndport = th->th_dport; 6391 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6392 } 6393 rewrite++; 6394 break; 6395 case IPPROTO_UDP: 6396 if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) || 6397 nk->port[pd->sidx] != pd->nsport) { 6398 pf_change_ap(pd, pd->src, 6399 &pd->hdr.udp.uh_sport, 6400 &nk->addr[pd->sidx], 6401 nk->port[pd->sidx]); 6402 pd->sport = &pd->hdr.udp.uh_sport; 6403 pd->nsport = pd->hdr.udp.uh_sport; 6404 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6405 } 6406 6407 if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || 6408 nk->port[pd->didx] != pd->ndport) { 6409 pf_change_ap(pd, pd->dst, 6410 &pd->hdr.udp.uh_dport, 6411 &nk->addr[pd->didx], 6412 nk->port[pd->didx]); 6413 pd->dport = &pd->hdr.udp.uh_dport; 6414 pd->ndport = pd->hdr.udp.uh_dport; 6415 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6416 } 6417 rewrite++; 6418 break; 6419 case IPPROTO_SCTP: { 6420 if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], pd->af) || 6421 nk->port[pd->sidx] != pd->nsport) { 6422 pf_change_ap(pd, pd->src, 6423 &pd->hdr.sctp.src_port, 6424 &nk->addr[pd->sidx], 6425 nk->port[pd->sidx]); 6426 pd->sport = &pd->hdr.sctp.src_port; 6427 pd->nsport = pd->hdr.sctp.src_port; 6428 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6429 } 6430 if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || 6431 nk->port[pd->didx] != pd->ndport) { 6432 pf_change_ap(pd, pd->dst, 6433 &pd->hdr.sctp.dest_port, 6434 &nk->addr[pd->didx], 6435 nk->port[pd->didx]); 6436 pd->dport = &pd->hdr.sctp.dest_port; 6437 pd->ndport = pd->hdr.sctp.dest_port; 6438 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6439 } 6440 break; 6441 } 6442 #ifdef INET 6443 case IPPROTO_ICMP: 6444 if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) { 6445 pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, 6446 nk->addr[pd->sidx].v4.s_addr, 0); 6447 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6448 } 6449 6450 if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) { 6451 pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, 6452 nk->addr[pd->didx].v4.s_addr, 0); 6453 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6454 } 6455 6456 if (ctx->virtual_type == htons(ICMP_ECHO) && 6457 nk->port[pd->sidx] != pd->hdr.icmp.icmp_id) { 6458 pd->hdr.icmp.icmp_cksum = pf_cksum_fixup( 6459 pd->hdr.icmp.icmp_cksum, pd->nsport, 6460 nk->port[pd->sidx], 0); 6461 pd->hdr.icmp.icmp_id = nk->port[pd->sidx]; 6462 pd->sport = &pd->hdr.icmp.icmp_id; 6463 } 6464 m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp); 6465 break; 6466 #endif /* INET */ 6467 #ifdef INET6 6468 case IPPROTO_ICMPV6: 6469 if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) { 6470 pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum, 6471 &nk->addr[pd->sidx], 0); 6472 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6473 } 6474 6475 if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) { 6476 pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum, 6477 &nk->addr[pd->didx], 0); 6478 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6479 } 6480 rewrite++; 6481 break; 6482 #endif /* INET */ 6483 default: 6484 switch (pd->af) { 6485 #ifdef INET 6486 case AF_INET: 6487 if (PF_ANEQ(&pd->nsaddr, 6488 &nk->addr[pd->sidx], AF_INET)) { 6489 pf_change_a(&pd->src->v4.s_addr, 6490 pd->ip_sum, 6491 nk->addr[pd->sidx].v4.s_addr, 0); 6492 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6493 } 6494 6495 if (PF_ANEQ(&pd->ndaddr, 6496 &nk->addr[pd->didx], AF_INET)) { 6497 pf_change_a(&pd->dst->v4.s_addr, 6498 pd->ip_sum, 6499 nk->addr[pd->didx].v4.s_addr, 0); 6500 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6501 } 6502 break; 6503 #endif /* INET */ 6504 #ifdef INET6 6505 case AF_INET6: 6506 if (PF_ANEQ(&pd->nsaddr, 6507 &nk->addr[pd->sidx], AF_INET6)) { 6508 pf_addrcpy(&pd->nsaddr, &nk->addr[pd->sidx], 6509 pd->af); 6510 pf_addrcpy(pd->src, &nk->addr[pd->sidx], pd->af); 6511 } 6512 6513 if (PF_ANEQ(&pd->ndaddr, 6514 &nk->addr[pd->didx], AF_INET6)) { 6515 pf_addrcpy(&pd->ndaddr, &nk->addr[pd->didx], 6516 pd->af); 6517 pf_addrcpy(pd->dst, &nk->addr[pd->didx], 6518 pd->af); 6519 } 6520 break; 6521 #endif /* INET6 */ 6522 } 6523 break; 6524 } 6525 return (rewrite); 6526 } 6527 6528 static int 6529 pf_tcp_track_full(struct pf_kstate *state, struct pf_pdesc *pd, 6530 u_short *reason, int *copyback, struct pf_state_peer *src, 6531 struct pf_state_peer *dst, u_int8_t psrc, u_int8_t pdst) 6532 { 6533 struct tcphdr *th = &pd->hdr.tcp; 6534 u_int16_t win = ntohs(th->th_win); 6535 u_int32_t ack, end, data_end, seq, orig_seq; 6536 u_int8_t sws, dws; 6537 int ackskew; 6538 6539 if (src->wscale && dst->wscale && !(tcp_get_flags(th) & TH_SYN)) { 6540 sws = src->wscale & PF_WSCALE_MASK; 6541 dws = dst->wscale & PF_WSCALE_MASK; 6542 } else 6543 sws = dws = 0; 6544 6545 /* 6546 * Sequence tracking algorithm from Guido van Rooij's paper: 6547 * http://www.madison-gurkha.com/publications/tcp_filtering/ 6548 * tcp_filtering.ps 6549 */ 6550 6551 orig_seq = seq = ntohl(th->th_seq); 6552 if (src->seqlo == 0) { 6553 /* First packet from this end. Set its state */ 6554 6555 if ((state->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 6556 src->scrub == NULL) { 6557 if (pf_normalize_tcp_init(pd, th, src)) { 6558 REASON_SET(reason, PFRES_MEMORY); 6559 return (PF_DROP); 6560 } 6561 } 6562 6563 /* Deferred generation of sequence number modulator */ 6564 if (dst->seqdiff && !src->seqdiff) { 6565 /* use random iss for the TCP server */ 6566 while ((src->seqdiff = arc4random() - seq) == 0) 6567 ; 6568 ack = ntohl(th->th_ack) - dst->seqdiff; 6569 pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq + 6570 src->seqdiff), 0); 6571 pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0); 6572 *copyback = 1; 6573 } else { 6574 ack = ntohl(th->th_ack); 6575 } 6576 6577 end = seq + pd->p_len; 6578 if (tcp_get_flags(th) & TH_SYN) { 6579 end++; 6580 if (dst->wscale & PF_WSCALE_FLAG) { 6581 src->wscale = pf_get_wscale(pd); 6582 if (src->wscale & PF_WSCALE_FLAG) { 6583 /* Remove scale factor from initial 6584 * window */ 6585 sws = src->wscale & PF_WSCALE_MASK; 6586 win = ((u_int32_t)win + (1 << sws) - 1) 6587 >> sws; 6588 dws = dst->wscale & PF_WSCALE_MASK; 6589 } else { 6590 /* fixup other window */ 6591 dst->max_win = MIN(TCP_MAXWIN, 6592 (u_int32_t)dst->max_win << 6593 (dst->wscale & PF_WSCALE_MASK)); 6594 /* in case of a retrans SYN|ACK */ 6595 dst->wscale = 0; 6596 } 6597 } 6598 } 6599 data_end = end; 6600 if (tcp_get_flags(th) & TH_FIN) 6601 end++; 6602 6603 src->seqlo = seq; 6604 if (src->state < TCPS_SYN_SENT) 6605 pf_set_protostate(state, psrc, TCPS_SYN_SENT); 6606 6607 /* 6608 * May need to slide the window (seqhi may have been set by 6609 * the crappy stack check or if we picked up the connection 6610 * after establishment) 6611 */ 6612 if (src->seqhi == 1 || 6613 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 6614 src->seqhi = end + MAX(1, dst->max_win << dws); 6615 if (win > src->max_win) 6616 src->max_win = win; 6617 6618 } else { 6619 ack = ntohl(th->th_ack) - dst->seqdiff; 6620 if (src->seqdiff) { 6621 /* Modulate sequence numbers */ 6622 pf_change_proto_a(pd->m, &th->th_seq, &th->th_sum, htonl(seq + 6623 src->seqdiff), 0); 6624 pf_change_proto_a(pd->m, &th->th_ack, &th->th_sum, htonl(ack), 0); 6625 *copyback = 1; 6626 } 6627 end = seq + pd->p_len; 6628 if (tcp_get_flags(th) & TH_SYN) 6629 end++; 6630 data_end = end; 6631 if (tcp_get_flags(th) & TH_FIN) 6632 end++; 6633 } 6634 6635 if ((tcp_get_flags(th) & TH_ACK) == 0) { 6636 /* Let it pass through the ack skew check */ 6637 ack = dst->seqlo; 6638 } else if ((ack == 0 && 6639 (tcp_get_flags(th) & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 6640 /* broken tcp stacks do not set ack */ 6641 (dst->state < TCPS_SYN_SENT)) { 6642 /* 6643 * Many stacks (ours included) will set the ACK number in an 6644 * FIN|ACK if the SYN times out -- no sequence to ACK. 6645 */ 6646 ack = dst->seqlo; 6647 } 6648 6649 if (seq == end) { 6650 /* Ease sequencing restrictions on no data packets */ 6651 seq = src->seqlo; 6652 data_end = end = seq; 6653 } 6654 6655 ackskew = dst->seqlo - ack; 6656 6657 /* 6658 * Need to demodulate the sequence numbers in any TCP SACK options 6659 * (Selective ACK). We could optionally validate the SACK values 6660 * against the current ACK window, either forwards or backwards, but 6661 * I'm not confident that SACK has been implemented properly 6662 * everywhere. It wouldn't surprise me if several stacks accidentally 6663 * SACK too far backwards of previously ACKed data. There really aren't 6664 * any security implications of bad SACKing unless the target stack 6665 * doesn't validate the option length correctly. Someone trying to 6666 * spoof into a TCP connection won't bother blindly sending SACK 6667 * options anyway. 6668 */ 6669 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 6670 if (pf_modulate_sack(pd, th, dst)) 6671 *copyback = 1; 6672 } 6673 6674 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 6675 if (SEQ_GEQ(src->seqhi, data_end) && 6676 /* Last octet inside other's window space */ 6677 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 6678 /* Retrans: not more than one window back */ 6679 (ackskew >= -MAXACKWINDOW) && 6680 /* Acking not more than one reassembled fragment backwards */ 6681 (ackskew <= (MAXACKWINDOW << sws)) && 6682 /* Acking not more than one window forward */ 6683 ((tcp_get_flags(th) & TH_RST) == 0 || orig_seq == src->seqlo || 6684 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 6685 /* Require an exact/+1 sequence match on resets when possible */ 6686 6687 if (dst->scrub || src->scrub) { 6688 if (pf_normalize_tcp_stateful(pd, reason, th, 6689 state, src, dst, copyback)) 6690 return (PF_DROP); 6691 } 6692 6693 /* update max window */ 6694 if (src->max_win < win) 6695 src->max_win = win; 6696 /* synchronize sequencing */ 6697 if (SEQ_GT(end, src->seqlo)) 6698 src->seqlo = end; 6699 /* slide the window of what the other end can send */ 6700 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 6701 dst->seqhi = ack + MAX((win << sws), 1); 6702 6703 /* update states */ 6704 if (tcp_get_flags(th) & TH_SYN) 6705 if (src->state < TCPS_SYN_SENT) 6706 pf_set_protostate(state, psrc, TCPS_SYN_SENT); 6707 if (tcp_get_flags(th) & TH_FIN) 6708 if (src->state < TCPS_CLOSING) 6709 pf_set_protostate(state, psrc, TCPS_CLOSING); 6710 if (tcp_get_flags(th) & TH_ACK) { 6711 if (dst->state == TCPS_SYN_SENT) { 6712 pf_set_protostate(state, pdst, 6713 TCPS_ESTABLISHED); 6714 if (src->state == TCPS_ESTABLISHED && 6715 state->sns[PF_SN_LIMIT] != NULL && 6716 pf_src_connlimit(state)) { 6717 REASON_SET(reason, PFRES_SRCLIMIT); 6718 return (PF_DROP); 6719 } 6720 } else if (dst->state == TCPS_CLOSING) 6721 pf_set_protostate(state, pdst, 6722 TCPS_FIN_WAIT_2); 6723 } 6724 if (tcp_get_flags(th) & TH_RST) 6725 pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT); 6726 6727 /* update expire time */ 6728 state->expire = pf_get_uptime(); 6729 if (src->state >= TCPS_FIN_WAIT_2 && 6730 dst->state >= TCPS_FIN_WAIT_2) 6731 state->timeout = PFTM_TCP_CLOSED; 6732 else if (src->state >= TCPS_CLOSING && 6733 dst->state >= TCPS_CLOSING) 6734 state->timeout = PFTM_TCP_FIN_WAIT; 6735 else if (src->state < TCPS_ESTABLISHED || 6736 dst->state < TCPS_ESTABLISHED) 6737 state->timeout = PFTM_TCP_OPENING; 6738 else if (src->state >= TCPS_CLOSING || 6739 dst->state >= TCPS_CLOSING) 6740 state->timeout = PFTM_TCP_CLOSING; 6741 else 6742 state->timeout = PFTM_TCP_ESTABLISHED; 6743 6744 /* Fall through to PASS packet */ 6745 6746 } else if ((dst->state < TCPS_SYN_SENT || 6747 dst->state >= TCPS_FIN_WAIT_2 || 6748 src->state >= TCPS_FIN_WAIT_2) && 6749 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 6750 /* Within a window forward of the originating packet */ 6751 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 6752 /* Within a window backward of the originating packet */ 6753 6754 /* 6755 * This currently handles three situations: 6756 * 1) Stupid stacks will shotgun SYNs before their peer 6757 * replies. 6758 * 2) When PF catches an already established stream (the 6759 * firewall rebooted, the state table was flushed, routes 6760 * changed...) 6761 * 3) Packets get funky immediately after the connection 6762 * closes (this should catch Solaris spurious ACK|FINs 6763 * that web servers like to spew after a close) 6764 * 6765 * This must be a little more careful than the above code 6766 * since packet floods will also be caught here. We don't 6767 * update the TTL here to mitigate the damage of a packet 6768 * flood and so the same code can handle awkward establishment 6769 * and a loosened connection close. 6770 * In the establishment case, a correct peer response will 6771 * validate the connection, go through the normal state code 6772 * and keep updating the state TTL. 6773 */ 6774 6775 if (V_pf_status.debug >= PF_DEBUG_MISC) { 6776 printf("pf: loose state match: "); 6777 pf_print_state(state); 6778 pf_print_flags(tcp_get_flags(th)); 6779 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 6780 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 6781 pd->p_len, ackskew, (unsigned long long)state->packets[0], 6782 (unsigned long long)state->packets[1], 6783 pd->dir == PF_IN ? "in" : "out", 6784 pd->dir == state->direction ? "fwd" : "rev"); 6785 } 6786 6787 if (dst->scrub || src->scrub) { 6788 if (pf_normalize_tcp_stateful(pd, reason, th, 6789 state, src, dst, copyback)) 6790 return (PF_DROP); 6791 } 6792 6793 /* update max window */ 6794 if (src->max_win < win) 6795 src->max_win = win; 6796 /* synchronize sequencing */ 6797 if (SEQ_GT(end, src->seqlo)) 6798 src->seqlo = end; 6799 /* slide the window of what the other end can send */ 6800 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 6801 dst->seqhi = ack + MAX((win << sws), 1); 6802 6803 /* 6804 * Cannot set dst->seqhi here since this could be a shotgunned 6805 * SYN and not an already established connection. 6806 */ 6807 6808 if (tcp_get_flags(th) & TH_FIN) 6809 if (src->state < TCPS_CLOSING) 6810 pf_set_protostate(state, psrc, TCPS_CLOSING); 6811 if (tcp_get_flags(th) & TH_RST) 6812 pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT); 6813 6814 /* Fall through to PASS packet */ 6815 6816 } else { 6817 if (state->dst.state == TCPS_SYN_SENT && 6818 state->src.state == TCPS_SYN_SENT) { 6819 /* Send RST for state mismatches during handshake */ 6820 if (!(tcp_get_flags(th) & TH_RST)) 6821 pf_send_tcp(state->rule, pd->af, 6822 pd->dst, pd->src, th->th_dport, 6823 th->th_sport, ntohl(th->th_ack), 0, 6824 TH_RST, 0, 0, 6825 state->rule->return_ttl, M_SKIP_FIREWALL, 6826 0, 0, state->act.rtableid); 6827 src->seqlo = 0; 6828 src->seqhi = 1; 6829 src->max_win = 1; 6830 } else if (V_pf_status.debug >= PF_DEBUG_MISC) { 6831 printf("pf: BAD state: "); 6832 pf_print_state(state); 6833 pf_print_flags(tcp_get_flags(th)); 6834 printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 6835 "pkts=%llu:%llu dir=%s,%s\n", 6836 seq, orig_seq, ack, pd->p_len, ackskew, 6837 (unsigned long long)state->packets[0], 6838 (unsigned long long)state->packets[1], 6839 pd->dir == PF_IN ? "in" : "out", 6840 pd->dir == state->direction ? "fwd" : "rev"); 6841 printf("pf: State failure on: %c %c %c %c | %c %c\n", 6842 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 6843 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 6844 ' ': '2', 6845 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 6846 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 6847 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ?' ' :'5', 6848 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 6849 } 6850 REASON_SET(reason, PFRES_BADSTATE); 6851 return (PF_DROP); 6852 } 6853 6854 return (PF_PASS); 6855 } 6856 6857 static int 6858 pf_tcp_track_sloppy(struct pf_kstate *state, struct pf_pdesc *pd, 6859 u_short *reason, struct pf_state_peer *src, struct pf_state_peer *dst, 6860 u_int8_t psrc, u_int8_t pdst) 6861 { 6862 struct tcphdr *th = &pd->hdr.tcp; 6863 6864 if (tcp_get_flags(th) & TH_SYN) 6865 if (src->state < TCPS_SYN_SENT) 6866 pf_set_protostate(state, psrc, TCPS_SYN_SENT); 6867 if (tcp_get_flags(th) & TH_FIN) 6868 if (src->state < TCPS_CLOSING) 6869 pf_set_protostate(state, psrc, TCPS_CLOSING); 6870 if (tcp_get_flags(th) & TH_ACK) { 6871 if (dst->state == TCPS_SYN_SENT) { 6872 pf_set_protostate(state, pdst, TCPS_ESTABLISHED); 6873 if (src->state == TCPS_ESTABLISHED && 6874 state->sns[PF_SN_LIMIT] != NULL && 6875 pf_src_connlimit(state)) { 6876 REASON_SET(reason, PFRES_SRCLIMIT); 6877 return (PF_DROP); 6878 } 6879 } else if (dst->state == TCPS_CLOSING) { 6880 pf_set_protostate(state, pdst, TCPS_FIN_WAIT_2); 6881 } else if (src->state == TCPS_SYN_SENT && 6882 dst->state < TCPS_SYN_SENT) { 6883 /* 6884 * Handle a special sloppy case where we only see one 6885 * half of the connection. If there is a ACK after 6886 * the initial SYN without ever seeing a packet from 6887 * the destination, set the connection to established. 6888 */ 6889 pf_set_protostate(state, PF_PEER_BOTH, 6890 TCPS_ESTABLISHED); 6891 dst->state = src->state = TCPS_ESTABLISHED; 6892 if (state->sns[PF_SN_LIMIT] != NULL && 6893 pf_src_connlimit(state)) { 6894 REASON_SET(reason, PFRES_SRCLIMIT); 6895 return (PF_DROP); 6896 } 6897 } else if (src->state == TCPS_CLOSING && 6898 dst->state == TCPS_ESTABLISHED && 6899 dst->seqlo == 0) { 6900 /* 6901 * Handle the closing of half connections where we 6902 * don't see the full bidirectional FIN/ACK+ACK 6903 * handshake. 6904 */ 6905 pf_set_protostate(state, pdst, TCPS_CLOSING); 6906 } 6907 } 6908 if (tcp_get_flags(th) & TH_RST) 6909 pf_set_protostate(state, PF_PEER_BOTH, TCPS_TIME_WAIT); 6910 6911 /* update expire time */ 6912 state->expire = pf_get_uptime(); 6913 if (src->state >= TCPS_FIN_WAIT_2 && 6914 dst->state >= TCPS_FIN_WAIT_2) 6915 state->timeout = PFTM_TCP_CLOSED; 6916 else if (src->state >= TCPS_CLOSING && 6917 dst->state >= TCPS_CLOSING) 6918 state->timeout = PFTM_TCP_FIN_WAIT; 6919 else if (src->state < TCPS_ESTABLISHED || 6920 dst->state < TCPS_ESTABLISHED) 6921 state->timeout = PFTM_TCP_OPENING; 6922 else if (src->state >= TCPS_CLOSING || 6923 dst->state >= TCPS_CLOSING) 6924 state->timeout = PFTM_TCP_CLOSING; 6925 else 6926 state->timeout = PFTM_TCP_ESTABLISHED; 6927 6928 return (PF_PASS); 6929 } 6930 6931 static int 6932 pf_synproxy(struct pf_pdesc *pd, struct pf_kstate *state, u_short *reason) 6933 { 6934 struct pf_state_key *sk = state->key[pd->didx]; 6935 struct tcphdr *th = &pd->hdr.tcp; 6936 6937 if (state->src.state == PF_TCPS_PROXY_SRC) { 6938 if (pd->dir != state->direction) { 6939 REASON_SET(reason, PFRES_SYNPROXY); 6940 return (PF_SYNPROXY_DROP); 6941 } 6942 if (tcp_get_flags(th) & TH_SYN) { 6943 if (ntohl(th->th_seq) != state->src.seqlo) { 6944 REASON_SET(reason, PFRES_SYNPROXY); 6945 return (PF_DROP); 6946 } 6947 pf_send_tcp(state->rule, pd->af, pd->dst, 6948 pd->src, th->th_dport, th->th_sport, 6949 state->src.seqhi, ntohl(th->th_seq) + 1, 6950 TH_SYN|TH_ACK, 0, state->src.mss, 0, 6951 M_SKIP_FIREWALL, 0, 0, state->act.rtableid); 6952 REASON_SET(reason, PFRES_SYNPROXY); 6953 return (PF_SYNPROXY_DROP); 6954 } else if ((tcp_get_flags(th) & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 6955 (ntohl(th->th_ack) != state->src.seqhi + 1) || 6956 (ntohl(th->th_seq) != state->src.seqlo + 1)) { 6957 REASON_SET(reason, PFRES_SYNPROXY); 6958 return (PF_DROP); 6959 } else if (state->sns[PF_SN_LIMIT] != NULL && 6960 pf_src_connlimit(state)) { 6961 REASON_SET(reason, PFRES_SRCLIMIT); 6962 return (PF_DROP); 6963 } else 6964 pf_set_protostate(state, PF_PEER_SRC, 6965 PF_TCPS_PROXY_DST); 6966 } 6967 if (state->src.state == PF_TCPS_PROXY_DST) { 6968 if (pd->dir == state->direction) { 6969 if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != TH_ACK) || 6970 (ntohl(th->th_ack) != state->src.seqhi + 1) || 6971 (ntohl(th->th_seq) != state->src.seqlo + 1)) { 6972 REASON_SET(reason, PFRES_SYNPROXY); 6973 return (PF_DROP); 6974 } 6975 state->src.max_win = MAX(ntohs(th->th_win), 1); 6976 if (state->dst.seqhi == 1) 6977 state->dst.seqhi = arc4random(); 6978 pf_send_tcp(state->rule, pd->af, 6979 &sk->addr[pd->sidx], &sk->addr[pd->didx], 6980 sk->port[pd->sidx], sk->port[pd->didx], 6981 state->dst.seqhi, 0, TH_SYN, 0, 6982 state->src.mss, 0, 6983 state->orig_kif->pfik_ifp == V_loif ? M_LOOP : 0, 6984 state->tag, 0, state->act.rtableid); 6985 REASON_SET(reason, PFRES_SYNPROXY); 6986 return (PF_SYNPROXY_DROP); 6987 } else if (((tcp_get_flags(th) & (TH_SYN|TH_ACK)) != 6988 (TH_SYN|TH_ACK)) || 6989 (ntohl(th->th_ack) != state->dst.seqhi + 1)) { 6990 REASON_SET(reason, PFRES_SYNPROXY); 6991 return (PF_DROP); 6992 } else { 6993 state->dst.max_win = MAX(ntohs(th->th_win), 1); 6994 state->dst.seqlo = ntohl(th->th_seq); 6995 pf_send_tcp(state->rule, pd->af, pd->dst, 6996 pd->src, th->th_dport, th->th_sport, 6997 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 6998 TH_ACK, state->src.max_win, 0, 0, 0, 6999 state->tag, 0, state->act.rtableid); 7000 pf_send_tcp(state->rule, pd->af, 7001 &sk->addr[pd->sidx], &sk->addr[pd->didx], 7002 sk->port[pd->sidx], sk->port[pd->didx], 7003 state->src.seqhi + 1, state->src.seqlo + 1, 7004 TH_ACK, state->dst.max_win, 0, 0, 7005 M_SKIP_FIREWALL, 0, 0, state->act.rtableid); 7006 state->src.seqdiff = state->dst.seqhi - 7007 state->src.seqlo; 7008 state->dst.seqdiff = state->src.seqhi - 7009 state->dst.seqlo; 7010 state->src.seqhi = state->src.seqlo + 7011 state->dst.max_win; 7012 state->dst.seqhi = state->dst.seqlo + 7013 state->src.max_win; 7014 state->src.wscale = state->dst.wscale = 0; 7015 pf_set_protostate(state, PF_PEER_BOTH, 7016 TCPS_ESTABLISHED); 7017 REASON_SET(reason, PFRES_SYNPROXY); 7018 return (PF_SYNPROXY_DROP); 7019 } 7020 } 7021 7022 return (PF_PASS); 7023 } 7024 7025 static int 7026 pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason) 7027 { 7028 struct pf_state_key_cmp key; 7029 int copyback = 0; 7030 struct pf_state_peer *src, *dst; 7031 uint8_t psrc, pdst; 7032 int action; 7033 7034 bzero(&key, sizeof(key)); 7035 key.af = pd->af; 7036 key.proto = pd->virtual_proto; 7037 pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); 7038 pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); 7039 key.port[pd->sidx] = pd->osport; 7040 key.port[pd->didx] = pd->odport; 7041 7042 action = pf_find_state(pd, &key, state); 7043 if (action != PF_MATCH) 7044 return (action); 7045 7046 action = PF_PASS; 7047 if (pd->dir == (*state)->direction) { 7048 if (PF_REVERSED_KEY(*state, pd->af)) { 7049 src = &(*state)->dst; 7050 dst = &(*state)->src; 7051 psrc = PF_PEER_DST; 7052 pdst = PF_PEER_SRC; 7053 } else { 7054 src = &(*state)->src; 7055 dst = &(*state)->dst; 7056 psrc = PF_PEER_SRC; 7057 pdst = PF_PEER_DST; 7058 } 7059 } else { 7060 if (PF_REVERSED_KEY(*state, pd->af)) { 7061 src = &(*state)->src; 7062 dst = &(*state)->dst; 7063 psrc = PF_PEER_SRC; 7064 pdst = PF_PEER_DST; 7065 } else { 7066 src = &(*state)->dst; 7067 dst = &(*state)->src; 7068 psrc = PF_PEER_DST; 7069 pdst = PF_PEER_SRC; 7070 } 7071 } 7072 7073 switch (pd->virtual_proto) { 7074 case IPPROTO_TCP: { 7075 struct tcphdr *th = &pd->hdr.tcp; 7076 7077 if ((action = pf_synproxy(pd, *state, reason)) != PF_PASS) 7078 return (action); 7079 if (((tcp_get_flags(th) & (TH_SYN | TH_ACK)) == TH_SYN) || 7080 ((th->th_flags & (TH_SYN | TH_ACK | TH_RST)) == TH_ACK && 7081 pf_syncookie_check(pd) && pd->dir == PF_IN)) { 7082 if ((*state)->src.state >= TCPS_FIN_WAIT_2 && 7083 (*state)->dst.state >= TCPS_FIN_WAIT_2) { 7084 if (V_pf_status.debug >= PF_DEBUG_MISC) { 7085 printf("pf: state reuse "); 7086 pf_print_state(*state); 7087 pf_print_flags(tcp_get_flags(th)); 7088 printf("\n"); 7089 } 7090 /* XXX make sure it's the same direction ?? */ 7091 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); 7092 pf_remove_state(*state); 7093 *state = NULL; 7094 return (PF_DROP); 7095 } else if ((*state)->src.state >= TCPS_ESTABLISHED && 7096 (*state)->dst.state >= TCPS_ESTABLISHED) { 7097 /* 7098 * SYN matches existing state??? 7099 * Typically happens when sender boots up after 7100 * sudden panic. Certain protocols (NFSv3) are 7101 * always using same port numbers. Challenge 7102 * ACK enables all parties (firewall and peers) 7103 * to get in sync again. 7104 */ 7105 pf_send_challenge_ack(pd, *state, src, dst); 7106 return (PF_DROP); 7107 } 7108 } 7109 if ((*state)->state_flags & PFSTATE_SLOPPY) { 7110 if (pf_tcp_track_sloppy(*state, pd, reason, src, dst, 7111 psrc, pdst) == PF_DROP) 7112 return (PF_DROP); 7113 } else { 7114 int ret; 7115 7116 ret = pf_tcp_track_full(*state, pd, reason, 7117 ©back, src, dst, psrc, pdst); 7118 if (ret == PF_DROP) 7119 return (PF_DROP); 7120 } 7121 break; 7122 } 7123 case IPPROTO_UDP: 7124 /* update states */ 7125 if (src->state < PFUDPS_SINGLE) 7126 pf_set_protostate(*state, psrc, PFUDPS_SINGLE); 7127 if (dst->state == PFUDPS_SINGLE) 7128 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); 7129 7130 /* update expire time */ 7131 (*state)->expire = pf_get_uptime(); 7132 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) 7133 (*state)->timeout = PFTM_UDP_MULTIPLE; 7134 else 7135 (*state)->timeout = PFTM_UDP_SINGLE; 7136 break; 7137 case IPPROTO_SCTP: 7138 if ((src->state >= SCTP_SHUTDOWN_SENT || src->state == SCTP_CLOSED) && 7139 (dst->state >= SCTP_SHUTDOWN_SENT || dst->state == SCTP_CLOSED) && 7140 pd->sctp_flags & PFDESC_SCTP_INIT) { 7141 pf_set_protostate(*state, PF_PEER_BOTH, SCTP_CLOSED); 7142 pf_remove_state(*state); 7143 *state = NULL; 7144 return (PF_DROP); 7145 } 7146 7147 if (pf_sctp_track(*state, pd, reason) != PF_PASS) 7148 return (PF_DROP); 7149 7150 /* Track state. */ 7151 if (pd->sctp_flags & PFDESC_SCTP_INIT) { 7152 if (src->state < SCTP_COOKIE_WAIT) { 7153 pf_set_protostate(*state, psrc, SCTP_COOKIE_WAIT); 7154 (*state)->timeout = PFTM_SCTP_OPENING; 7155 } 7156 } 7157 if (pd->sctp_flags & PFDESC_SCTP_INIT_ACK) { 7158 MPASS(dst->scrub != NULL); 7159 if (dst->scrub->pfss_v_tag == 0) 7160 dst->scrub->pfss_v_tag = pd->sctp_initiate_tag; 7161 } 7162 7163 /* 7164 * Bind to the correct interface if we're if-bound. For multihomed 7165 * extra associations we don't know which interface that will be until 7166 * here, so we've inserted the state on V_pf_all. Fix that now. 7167 */ 7168 if ((*state)->kif == V_pfi_all && 7169 (*state)->rule->rule_flag & PFRULE_IFBOUND) 7170 (*state)->kif = pd->kif; 7171 7172 if (pd->sctp_flags & (PFDESC_SCTP_COOKIE | PFDESC_SCTP_HEARTBEAT_ACK)) { 7173 if (src->state < SCTP_ESTABLISHED) { 7174 pf_set_protostate(*state, psrc, SCTP_ESTABLISHED); 7175 (*state)->timeout = PFTM_SCTP_ESTABLISHED; 7176 } 7177 } 7178 if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN | 7179 PFDESC_SCTP_SHUTDOWN_COMPLETE)) { 7180 if (src->state < SCTP_SHUTDOWN_PENDING) { 7181 pf_set_protostate(*state, psrc, SCTP_SHUTDOWN_PENDING); 7182 (*state)->timeout = PFTM_SCTP_CLOSING; 7183 } 7184 } 7185 if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN_COMPLETE | PFDESC_SCTP_ABORT)) { 7186 pf_set_protostate(*state, psrc, SCTP_CLOSED); 7187 (*state)->timeout = PFTM_SCTP_CLOSED; 7188 } 7189 7190 (*state)->expire = pf_get_uptime(); 7191 break; 7192 default: 7193 /* update states */ 7194 if (src->state < PFOTHERS_SINGLE) 7195 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); 7196 if (dst->state == PFOTHERS_SINGLE) 7197 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); 7198 7199 /* update expire time */ 7200 (*state)->expire = pf_get_uptime(); 7201 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) 7202 (*state)->timeout = PFTM_OTHER_MULTIPLE; 7203 else 7204 (*state)->timeout = PFTM_OTHER_SINGLE; 7205 break; 7206 } 7207 7208 /* translate source/destination address, if necessary */ 7209 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 7210 struct pf_state_key *nk; 7211 int afto, sidx, didx; 7212 7213 if (PF_REVERSED_KEY(*state, pd->af)) 7214 nk = (*state)->key[pd->sidx]; 7215 else 7216 nk = (*state)->key[pd->didx]; 7217 7218 afto = pd->af != nk->af; 7219 7220 if (afto && (*state)->direction == PF_IN) { 7221 sidx = pd->didx; 7222 didx = pd->sidx; 7223 } else { 7224 sidx = pd->sidx; 7225 didx = pd->didx; 7226 } 7227 7228 if (afto) { 7229 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 7230 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 7231 pd->naf = nk->af; 7232 action = PF_AFRT; 7233 } 7234 7235 if (afto || PF_ANEQ(pd->src, &nk->addr[sidx], pd->af) || 7236 nk->port[sidx] != pd->osport) 7237 pf_change_ap(pd, pd->src, pd->sport, 7238 &nk->addr[sidx], nk->port[sidx]); 7239 7240 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 7241 nk->port[didx] != pd->odport) 7242 pf_change_ap(pd, pd->dst, pd->dport, 7243 &nk->addr[didx], nk->port[didx]); 7244 7245 copyback = 1; 7246 } 7247 7248 if (copyback && pd->hdrlen > 0) 7249 m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any); 7250 7251 return (action); 7252 } 7253 7254 static int 7255 pf_sctp_track(struct pf_kstate *state, struct pf_pdesc *pd, 7256 u_short *reason) 7257 { 7258 struct pf_state_peer *src; 7259 if (pd->dir == state->direction) { 7260 if (PF_REVERSED_KEY(state, pd->af)) 7261 src = &state->dst; 7262 else 7263 src = &state->src; 7264 } else { 7265 if (PF_REVERSED_KEY(state, pd->af)) 7266 src = &state->src; 7267 else 7268 src = &state->dst; 7269 } 7270 7271 if (src->scrub != NULL) { 7272 if (src->scrub->pfss_v_tag == 0) 7273 src->scrub->pfss_v_tag = pd->hdr.sctp.v_tag; 7274 else if (src->scrub->pfss_v_tag != pd->hdr.sctp.v_tag) 7275 return (PF_DROP); 7276 } 7277 7278 return (PF_PASS); 7279 } 7280 7281 static void 7282 pf_sctp_multihome_detach_addr(const struct pf_kstate *s) 7283 { 7284 struct pf_sctp_endpoint key; 7285 struct pf_sctp_endpoint *ep; 7286 struct pf_state_key *sks = s->key[PF_SK_STACK]; 7287 struct pf_sctp_source *i, *tmp; 7288 7289 if (sks == NULL || sks->proto != IPPROTO_SCTP || s->dst.scrub == NULL) 7290 return; 7291 7292 PF_SCTP_ENDPOINTS_LOCK(); 7293 7294 key.v_tag = s->dst.scrub->pfss_v_tag; 7295 ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key); 7296 if (ep != NULL) { 7297 TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) { 7298 if (pf_addr_cmp(&i->addr, 7299 &s->key[PF_SK_WIRE]->addr[s->direction == PF_OUT], 7300 s->key[PF_SK_WIRE]->af) == 0) { 7301 SDT_PROBE3(pf, sctp, multihome, remove, 7302 key.v_tag, s, i); 7303 TAILQ_REMOVE(&ep->sources, i, entry); 7304 free(i, M_PFTEMP); 7305 break; 7306 } 7307 } 7308 7309 if (TAILQ_EMPTY(&ep->sources)) { 7310 RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep); 7311 free(ep, M_PFTEMP); 7312 } 7313 } 7314 7315 /* Other direction. */ 7316 key.v_tag = s->src.scrub->pfss_v_tag; 7317 ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key); 7318 if (ep != NULL) { 7319 TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) { 7320 if (pf_addr_cmp(&i->addr, 7321 &s->key[PF_SK_WIRE]->addr[s->direction == PF_IN], 7322 s->key[PF_SK_WIRE]->af) == 0) { 7323 SDT_PROBE3(pf, sctp, multihome, remove, 7324 key.v_tag, s, i); 7325 TAILQ_REMOVE(&ep->sources, i, entry); 7326 free(i, M_PFTEMP); 7327 break; 7328 } 7329 } 7330 7331 if (TAILQ_EMPTY(&ep->sources)) { 7332 RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep); 7333 free(ep, M_PFTEMP); 7334 } 7335 } 7336 7337 PF_SCTP_ENDPOINTS_UNLOCK(); 7338 } 7339 7340 static void 7341 pf_sctp_multihome_add_addr(struct pf_pdesc *pd, struct pf_addr *a, uint32_t v_tag) 7342 { 7343 struct pf_sctp_endpoint key = { 7344 .v_tag = v_tag, 7345 }; 7346 struct pf_sctp_source *i; 7347 struct pf_sctp_endpoint *ep; 7348 int count; 7349 7350 PF_SCTP_ENDPOINTS_LOCK(); 7351 7352 ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key); 7353 if (ep == NULL) { 7354 ep = malloc(sizeof(struct pf_sctp_endpoint), 7355 M_PFTEMP, M_NOWAIT); 7356 if (ep == NULL) { 7357 PF_SCTP_ENDPOINTS_UNLOCK(); 7358 return; 7359 } 7360 7361 ep->v_tag = v_tag; 7362 TAILQ_INIT(&ep->sources); 7363 RB_INSERT(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep); 7364 } 7365 7366 /* Avoid inserting duplicates. */ 7367 count = 0; 7368 TAILQ_FOREACH(i, &ep->sources, entry) { 7369 count++; 7370 if (pf_addr_cmp(&i->addr, a, pd->af) == 0) { 7371 PF_SCTP_ENDPOINTS_UNLOCK(); 7372 return; 7373 } 7374 } 7375 7376 /* Limit the number of addresses per endpoint. */ 7377 if (count >= PF_SCTP_MAX_ENDPOINTS) { 7378 PF_SCTP_ENDPOINTS_UNLOCK(); 7379 return; 7380 } 7381 7382 i = malloc(sizeof(*i), M_PFTEMP, M_NOWAIT); 7383 if (i == NULL) { 7384 PF_SCTP_ENDPOINTS_UNLOCK(); 7385 return; 7386 } 7387 7388 i->af = pd->af; 7389 memcpy(&i->addr, a, sizeof(*a)); 7390 TAILQ_INSERT_TAIL(&ep->sources, i, entry); 7391 SDT_PROBE2(pf, sctp, multihome, add, v_tag, i); 7392 7393 PF_SCTP_ENDPOINTS_UNLOCK(); 7394 } 7395 7396 static void 7397 pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif, 7398 struct pf_kstate *s, int action) 7399 { 7400 struct pf_sctp_multihome_job *j, *tmp; 7401 struct pf_sctp_source *i; 7402 int ret __unused; 7403 struct pf_kstate *sm = NULL; 7404 struct pf_krule *ra = NULL; 7405 struct pf_krule *r = &V_pf_default_rule; 7406 struct pf_kruleset *rs = NULL; 7407 u_short reason; 7408 bool do_extra = true; 7409 7410 PF_RULES_RLOCK_TRACKER; 7411 7412 again: 7413 TAILQ_FOREACH_SAFE(j, &pd->sctp_multihome_jobs, next, tmp) { 7414 if (s == NULL || action != PF_PASS) 7415 goto free; 7416 7417 /* Confirm we don't recurse here. */ 7418 MPASS(! (pd->sctp_flags & PFDESC_SCTP_ADD_IP)); 7419 7420 switch (j->op) { 7421 case SCTP_ADD_IP_ADDRESS: { 7422 uint32_t v_tag = pd->sctp_initiate_tag; 7423 7424 if (v_tag == 0) { 7425 if (s->direction == pd->dir) 7426 v_tag = s->src.scrub->pfss_v_tag; 7427 else 7428 v_tag = s->dst.scrub->pfss_v_tag; 7429 } 7430 7431 /* 7432 * Avoid duplicating states. We'll already have 7433 * created a state based on the source address of 7434 * the packet, but SCTP endpoints may also list this 7435 * address again in the INIT(_ACK) parameters. 7436 */ 7437 if (pf_addr_cmp(&j->src, pd->src, pd->af) == 0) { 7438 break; 7439 } 7440 7441 j->pd.sctp_flags |= PFDESC_SCTP_ADD_IP; 7442 PF_RULES_RLOCK(); 7443 sm = NULL; 7444 if (s->rule->rule_flag & PFRULE_ALLOW_RELATED) { 7445 j->pd.related_rule = s->rule; 7446 } 7447 ret = pf_test_rule(&r, &sm, 7448 &j->pd, &ra, &rs, &reason, NULL); 7449 PF_RULES_RUNLOCK(); 7450 SDT_PROBE4(pf, sctp, multihome, test, kif, r, j->pd.m, ret); 7451 if (ret != PF_DROP && sm != NULL) { 7452 /* Inherit v_tag values. */ 7453 if (sm->direction == s->direction) { 7454 sm->src.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag; 7455 sm->dst.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag; 7456 } else { 7457 sm->src.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag; 7458 sm->dst.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag; 7459 } 7460 PF_STATE_UNLOCK(sm); 7461 } else { 7462 /* If we try duplicate inserts? */ 7463 break; 7464 } 7465 7466 /* Only add the address if we've actually allowed the state. */ 7467 pf_sctp_multihome_add_addr(pd, &j->src, v_tag); 7468 7469 if (! do_extra) { 7470 break; 7471 } 7472 /* 7473 * We need to do this for each of our source addresses. 7474 * Find those based on the verification tag. 7475 */ 7476 struct pf_sctp_endpoint key = { 7477 .v_tag = pd->hdr.sctp.v_tag, 7478 }; 7479 struct pf_sctp_endpoint *ep; 7480 7481 PF_SCTP_ENDPOINTS_LOCK(); 7482 ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key); 7483 if (ep == NULL) { 7484 PF_SCTP_ENDPOINTS_UNLOCK(); 7485 break; 7486 } 7487 MPASS(ep != NULL); 7488 7489 TAILQ_FOREACH(i, &ep->sources, entry) { 7490 struct pf_sctp_multihome_job *nj; 7491 7492 /* SCTP can intermingle IPv4 and IPv6. */ 7493 if (i->af != pd->af) 7494 continue; 7495 7496 nj = malloc(sizeof(*nj), M_PFTEMP, M_NOWAIT | M_ZERO); 7497 if (! nj) { 7498 continue; 7499 } 7500 memcpy(&nj->pd, &j->pd, sizeof(j->pd)); 7501 memcpy(&nj->src, &j->src, sizeof(nj->src)); 7502 nj->pd.src = &nj->src; 7503 // New destination address! 7504 memcpy(&nj->dst, &i->addr, sizeof(nj->dst)); 7505 nj->pd.dst = &nj->dst; 7506 nj->pd.m = j->pd.m; 7507 nj->op = j->op; 7508 7509 TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, nj, next); 7510 } 7511 PF_SCTP_ENDPOINTS_UNLOCK(); 7512 7513 break; 7514 } 7515 case SCTP_DEL_IP_ADDRESS: { 7516 struct pf_state_key_cmp key; 7517 uint8_t psrc; 7518 int action; 7519 7520 bzero(&key, sizeof(key)); 7521 key.af = j->pd.af; 7522 key.proto = IPPROTO_SCTP; 7523 if (j->pd.dir == PF_IN) { /* wire side, straight */ 7524 pf_addrcpy(&key.addr[0], j->pd.src, key.af); 7525 pf_addrcpy(&key.addr[1], j->pd.dst, key.af); 7526 key.port[0] = j->pd.hdr.sctp.src_port; 7527 key.port[1] = j->pd.hdr.sctp.dest_port; 7528 } else { /* stack side, reverse */ 7529 pf_addrcpy(&key.addr[1], j->pd.src, key.af); 7530 pf_addrcpy(&key.addr[0], j->pd.dst, key.af); 7531 key.port[1] = j->pd.hdr.sctp.src_port; 7532 key.port[0] = j->pd.hdr.sctp.dest_port; 7533 } 7534 7535 action = pf_find_state(&j->pd, &key, &sm); 7536 if (action == PF_MATCH) { 7537 PF_STATE_LOCK_ASSERT(sm); 7538 if (j->pd.dir == sm->direction) { 7539 psrc = PF_PEER_SRC; 7540 } else { 7541 psrc = PF_PEER_DST; 7542 } 7543 pf_set_protostate(sm, psrc, SCTP_SHUTDOWN_PENDING); 7544 sm->timeout = PFTM_SCTP_CLOSING; 7545 PF_STATE_UNLOCK(sm); 7546 } 7547 break; 7548 default: 7549 panic("Unknown op %#x", j->op); 7550 } 7551 } 7552 7553 free: 7554 TAILQ_REMOVE(&pd->sctp_multihome_jobs, j, next); 7555 free(j, M_PFTEMP); 7556 } 7557 7558 /* We may have inserted extra work while processing the list. */ 7559 if (! TAILQ_EMPTY(&pd->sctp_multihome_jobs)) { 7560 do_extra = false; 7561 goto again; 7562 } 7563 } 7564 7565 static int 7566 pf_multihome_scan(int start, int len, struct pf_pdesc *pd, int op) 7567 { 7568 int off = 0; 7569 struct pf_sctp_multihome_job *job; 7570 7571 SDT_PROBE4(pf, sctp, multihome_scan, entry, start, len, pd, op); 7572 7573 while (off < len) { 7574 struct sctp_paramhdr h; 7575 7576 if (!pf_pull_hdr(pd->m, start + off, &h, sizeof(h), NULL, NULL, 7577 pd->af)) 7578 return (PF_DROP); 7579 7580 /* Parameters are at least 4 bytes. */ 7581 if (ntohs(h.param_length) < 4) 7582 return (PF_DROP); 7583 7584 SDT_PROBE2(pf, sctp, multihome_scan, param, ntohs(h.param_type), 7585 ntohs(h.param_length)); 7586 7587 switch (ntohs(h.param_type)) { 7588 case SCTP_IPV4_ADDRESS: { 7589 struct in_addr t; 7590 7591 if (ntohs(h.param_length) != 7592 (sizeof(struct sctp_paramhdr) + sizeof(t))) 7593 return (PF_DROP); 7594 7595 if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t), 7596 NULL, NULL, pd->af)) 7597 return (PF_DROP); 7598 7599 if (in_nullhost(t)) 7600 t.s_addr = pd->src->v4.s_addr; 7601 7602 /* 7603 * We hold the state lock (idhash) here, which means 7604 * that we can't acquire the keyhash, or we'll get a 7605 * LOR (and potentially double-lock things too). We also 7606 * can't release the state lock here, so instead we'll 7607 * enqueue this for async handling. 7608 * There's a relatively small race here, in that a 7609 * packet using the new addresses could arrive already, 7610 * but that's just though luck for it. 7611 */ 7612 job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO); 7613 if (! job) 7614 return (PF_DROP); 7615 7616 SDT_PROBE2(pf, sctp, multihome_scan, ipv4, &t, op); 7617 7618 memcpy(&job->pd, pd, sizeof(*pd)); 7619 7620 // New source address! 7621 memcpy(&job->src, &t, sizeof(t)); 7622 job->pd.src = &job->src; 7623 memcpy(&job->dst, pd->dst, sizeof(job->dst)); 7624 job->pd.dst = &job->dst; 7625 job->pd.m = pd->m; 7626 job->op = op; 7627 7628 TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next); 7629 break; 7630 } 7631 #ifdef INET6 7632 case SCTP_IPV6_ADDRESS: { 7633 struct in6_addr t; 7634 7635 if (ntohs(h.param_length) != 7636 (sizeof(struct sctp_paramhdr) + sizeof(t))) 7637 return (PF_DROP); 7638 7639 if (!pf_pull_hdr(pd->m, start + off + sizeof(h), &t, sizeof(t), 7640 NULL, NULL, pd->af)) 7641 return (PF_DROP); 7642 if (memcmp(&t, &pd->src->v6, sizeof(t)) == 0) 7643 break; 7644 if (memcmp(&t, &in6addr_any, sizeof(t)) == 0) 7645 memcpy(&t, &pd->src->v6, sizeof(t)); 7646 7647 job = malloc(sizeof(*job), M_PFTEMP, M_NOWAIT | M_ZERO); 7648 if (! job) 7649 return (PF_DROP); 7650 7651 SDT_PROBE2(pf, sctp, multihome_scan, ipv6, &t, op); 7652 7653 memcpy(&job->pd, pd, sizeof(*pd)); 7654 memcpy(&job->src, &t, sizeof(t)); 7655 job->pd.src = &job->src; 7656 memcpy(&job->dst, pd->dst, sizeof(job->dst)); 7657 job->pd.dst = &job->dst; 7658 job->pd.m = pd->m; 7659 job->op = op; 7660 7661 TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, job, next); 7662 break; 7663 } 7664 #endif /* INET6 */ 7665 case SCTP_ADD_IP_ADDRESS: { 7666 int ret; 7667 struct sctp_asconf_paramhdr ah; 7668 7669 if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah), 7670 NULL, NULL, pd->af)) 7671 return (PF_DROP); 7672 7673 ret = pf_multihome_scan(start + off + sizeof(ah), 7674 ntohs(ah.ph.param_length) - sizeof(ah), pd, 7675 SCTP_ADD_IP_ADDRESS); 7676 if (ret != PF_PASS) 7677 return (ret); 7678 break; 7679 } 7680 case SCTP_DEL_IP_ADDRESS: { 7681 int ret; 7682 struct sctp_asconf_paramhdr ah; 7683 7684 if (!pf_pull_hdr(pd->m, start + off, &ah, sizeof(ah), 7685 NULL, NULL, pd->af)) 7686 return (PF_DROP); 7687 ret = pf_multihome_scan(start + off + sizeof(ah), 7688 ntohs(ah.ph.param_length) - sizeof(ah), pd, 7689 SCTP_DEL_IP_ADDRESS); 7690 if (ret != PF_PASS) 7691 return (ret); 7692 break; 7693 } 7694 default: 7695 break; 7696 } 7697 7698 off += roundup(ntohs(h.param_length), 4); 7699 } 7700 7701 return (PF_PASS); 7702 } 7703 7704 int 7705 pf_multihome_scan_init(int start, int len, struct pf_pdesc *pd) 7706 { 7707 start += sizeof(struct sctp_init_chunk); 7708 len -= sizeof(struct sctp_init_chunk); 7709 7710 return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS)); 7711 } 7712 7713 int 7714 pf_multihome_scan_asconf(int start, int len, struct pf_pdesc *pd) 7715 { 7716 start += sizeof(struct sctp_asconf_chunk); 7717 len -= sizeof(struct sctp_asconf_chunk); 7718 7719 return (pf_multihome_scan(start, len, pd, SCTP_ADD_IP_ADDRESS)); 7720 } 7721 7722 int 7723 pf_icmp_state_lookup(struct pf_state_key_cmp *key, struct pf_pdesc *pd, 7724 struct pf_kstate **state, u_int16_t icmpid, u_int16_t type, int icmp_dir, 7725 int *iidx, int multi, int inner) 7726 { 7727 int action, direction = pd->dir; 7728 7729 key->af = pd->af; 7730 key->proto = pd->proto; 7731 if (icmp_dir == PF_IN) { 7732 *iidx = pd->sidx; 7733 key->port[pd->sidx] = icmpid; 7734 key->port[pd->didx] = type; 7735 } else { 7736 *iidx = pd->didx; 7737 key->port[pd->sidx] = type; 7738 key->port[pd->didx] = icmpid; 7739 } 7740 if (pf_state_key_addr_setup(pd, key, multi)) 7741 return (PF_DROP); 7742 7743 action = pf_find_state(pd, key, state); 7744 if (action != PF_MATCH) 7745 return (action); 7746 7747 if ((*state)->state_flags & PFSTATE_SLOPPY) 7748 return (-1); 7749 7750 /* Is this ICMP message flowing in right direction? */ 7751 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 7752 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 7753 PF_IN : PF_OUT; 7754 else 7755 direction = (*state)->direction; 7756 if ((*state)->rule->type && 7757 (((!inner && direction == pd->dir) || 7758 (inner && direction != pd->dir)) ? 7759 PF_IN : PF_OUT) != icmp_dir) { 7760 if (V_pf_status.debug >= PF_DEBUG_MISC) { 7761 printf("pf: icmp type %d in wrong direction (%d): ", 7762 ntohs(type), icmp_dir); 7763 pf_print_state(*state); 7764 printf("\n"); 7765 } 7766 PF_STATE_UNLOCK(*state); 7767 *state = NULL; 7768 return (PF_DROP); 7769 } 7770 return (-1); 7771 } 7772 7773 static int 7774 pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, 7775 u_short *reason) 7776 { 7777 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 7778 u_int16_t *icmpsum, virtual_id, virtual_type; 7779 u_int8_t icmptype, icmpcode; 7780 int icmp_dir, iidx, ret; 7781 struct pf_state_key_cmp key; 7782 #ifdef INET 7783 u_int16_t icmpid; 7784 #endif /* INET*/ 7785 7786 MPASS(*state == NULL); 7787 7788 bzero(&key, sizeof(key)); 7789 switch (pd->proto) { 7790 #ifdef INET 7791 case IPPROTO_ICMP: 7792 icmptype = pd->hdr.icmp.icmp_type; 7793 icmpcode = pd->hdr.icmp.icmp_code; 7794 icmpid = pd->hdr.icmp.icmp_id; 7795 icmpsum = &pd->hdr.icmp.icmp_cksum; 7796 break; 7797 #endif /* INET */ 7798 #ifdef INET6 7799 case IPPROTO_ICMPV6: 7800 icmptype = pd->hdr.icmp6.icmp6_type; 7801 icmpcode = pd->hdr.icmp6.icmp6_code; 7802 #ifdef INET 7803 icmpid = pd->hdr.icmp6.icmp6_id; 7804 #endif /* INET */ 7805 icmpsum = &pd->hdr.icmp6.icmp6_cksum; 7806 break; 7807 #endif /* INET6 */ 7808 default: 7809 panic("unhandled proto %d", pd->proto); 7810 } 7811 7812 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 7813 &virtual_type) == 0) { 7814 /* 7815 * ICMP query/reply message not related to a TCP/UDP/SCTP 7816 * packet. Search for an ICMP state. 7817 */ 7818 ret = pf_icmp_state_lookup(&key, pd, state, virtual_id, 7819 virtual_type, icmp_dir, &iidx, 0, 0); 7820 /* IPv6? try matching a multicast address */ 7821 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) { 7822 MPASS(*state == NULL); 7823 ret = pf_icmp_state_lookup(&key, pd, state, 7824 virtual_id, virtual_type, 7825 icmp_dir, &iidx, 1, 0); 7826 } 7827 if (ret >= 0) { 7828 MPASS(*state == NULL); 7829 return (ret); 7830 } 7831 7832 (*state)->expire = pf_get_uptime(); 7833 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 7834 7835 /* translate source/destination address, if necessary */ 7836 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 7837 struct pf_state_key *nk; 7838 int afto, sidx, didx; 7839 7840 if (PF_REVERSED_KEY(*state, pd->af)) 7841 nk = (*state)->key[pd->sidx]; 7842 else 7843 nk = (*state)->key[pd->didx]; 7844 7845 afto = pd->af != nk->af; 7846 7847 if (afto && (*state)->direction == PF_IN) { 7848 sidx = pd->didx; 7849 didx = pd->sidx; 7850 iidx = !iidx; 7851 } else { 7852 sidx = pd->sidx; 7853 didx = pd->didx; 7854 } 7855 7856 switch (pd->af) { 7857 #ifdef INET 7858 case AF_INET: 7859 #ifdef INET6 7860 if (afto) { 7861 if (pf_translate_icmp_af(AF_INET6, 7862 &pd->hdr.icmp)) 7863 return (PF_DROP); 7864 pd->proto = IPPROTO_ICMPV6; 7865 } 7866 #endif /* INET6 */ 7867 if (!afto && 7868 PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET)) 7869 pf_change_a(&saddr->v4.s_addr, 7870 pd->ip_sum, 7871 nk->addr[sidx].v4.s_addr, 7872 0); 7873 7874 if (!afto && PF_ANEQ(pd->dst, 7875 &nk->addr[didx], AF_INET)) 7876 pf_change_a(&daddr->v4.s_addr, 7877 pd->ip_sum, 7878 nk->addr[didx].v4.s_addr, 0); 7879 7880 if (nk->port[iidx] != 7881 pd->hdr.icmp.icmp_id) { 7882 pd->hdr.icmp.icmp_cksum = 7883 pf_cksum_fixup( 7884 pd->hdr.icmp.icmp_cksum, icmpid, 7885 nk->port[iidx], 0); 7886 pd->hdr.icmp.icmp_id = 7887 nk->port[iidx]; 7888 } 7889 7890 m_copyback(pd->m, pd->off, ICMP_MINLEN, 7891 (caddr_t )&pd->hdr.icmp); 7892 break; 7893 #endif /* INET */ 7894 #ifdef INET6 7895 case AF_INET6: 7896 #ifdef INET 7897 if (afto) { 7898 if (pf_translate_icmp_af(AF_INET, 7899 &pd->hdr.icmp6)) 7900 return (PF_DROP); 7901 pd->proto = IPPROTO_ICMP; 7902 } 7903 #endif /* INET */ 7904 if (!afto && 7905 PF_ANEQ(pd->src, &nk->addr[sidx], AF_INET6)) 7906 pf_change_a6(saddr, 7907 &pd->hdr.icmp6.icmp6_cksum, 7908 &nk->addr[sidx], 0); 7909 7910 if (!afto && PF_ANEQ(pd->dst, 7911 &nk->addr[didx], AF_INET6)) 7912 pf_change_a6(daddr, 7913 &pd->hdr.icmp6.icmp6_cksum, 7914 &nk->addr[didx], 0); 7915 7916 if (nk->port[iidx] != pd->hdr.icmp6.icmp6_id) 7917 pd->hdr.icmp6.icmp6_id = 7918 nk->port[iidx]; 7919 7920 m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr), 7921 (caddr_t )&pd->hdr.icmp6); 7922 break; 7923 #endif /* INET6 */ 7924 } 7925 if (afto) { 7926 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 7927 nk->af); 7928 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 7929 nk->af); 7930 pd->naf = nk->af; 7931 return (PF_AFRT); 7932 } 7933 } 7934 return (PF_PASS); 7935 7936 } else { 7937 /* 7938 * ICMP error message in response to a TCP/UDP packet. 7939 * Extract the inner TCP/UDP header and search for that state. 7940 */ 7941 7942 struct pf_pdesc pd2; 7943 bzero(&pd2, sizeof pd2); 7944 #ifdef INET 7945 struct ip h2; 7946 #endif /* INET */ 7947 #ifdef INET6 7948 struct ip6_hdr h2_6; 7949 #endif /* INET6 */ 7950 int ipoff2 = 0; 7951 7952 pd2.af = pd->af; 7953 pd2.dir = pd->dir; 7954 /* Payload packet is from the opposite direction. */ 7955 pd2.sidx = (pd->dir == PF_IN) ? 1 : 0; 7956 pd2.didx = (pd->dir == PF_IN) ? 0 : 1; 7957 pd2.m = pd->m; 7958 pd2.pf_mtag = pd->pf_mtag; 7959 pd2.kif = pd->kif; 7960 switch (pd->af) { 7961 #ifdef INET 7962 case AF_INET: 7963 /* offset of h2 in mbuf chain */ 7964 ipoff2 = pd->off + ICMP_MINLEN; 7965 7966 if (!pf_pull_hdr(pd->m, ipoff2, &h2, sizeof(h2), 7967 NULL, reason, pd2.af)) { 7968 DPFPRINTF(PF_DEBUG_MISC, 7969 "pf: ICMP error message too short " 7970 "(ip)"); 7971 return (PF_DROP); 7972 } 7973 /* 7974 * ICMP error messages don't refer to non-first 7975 * fragments 7976 */ 7977 if (h2.ip_off & htons(IP_OFFMASK)) { 7978 REASON_SET(reason, PFRES_FRAG); 7979 return (PF_DROP); 7980 } 7981 7982 /* offset of protocol header that follows h2 */ 7983 pd2.off = ipoff2; 7984 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 7985 return (PF_DROP); 7986 7987 pd2.tot_len = ntohs(h2.ip_len); 7988 pd2.src = (struct pf_addr *)&h2.ip_src; 7989 pd2.dst = (struct pf_addr *)&h2.ip_dst; 7990 pd2.ip_sum = &h2.ip_sum; 7991 break; 7992 #endif /* INET */ 7993 #ifdef INET6 7994 case AF_INET6: 7995 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 7996 7997 if (!pf_pull_hdr(pd->m, ipoff2, &h2_6, sizeof(h2_6), 7998 NULL, reason, pd2.af)) { 7999 DPFPRINTF(PF_DEBUG_MISC, 8000 "pf: ICMP error message too short " 8001 "(ip6)"); 8002 return (PF_DROP); 8003 } 8004 pd2.off = ipoff2; 8005 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 8006 return (PF_DROP); 8007 8008 pd2.tot_len = ntohs(h2_6.ip6_plen) + 8009 sizeof(struct ip6_hdr); 8010 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 8011 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 8012 pd2.ip_sum = NULL; 8013 break; 8014 #endif /* INET6 */ 8015 default: 8016 unhandled_af(pd->af); 8017 } 8018 8019 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 8020 if (V_pf_status.debug >= PF_DEBUG_MISC) { 8021 printf("pf: BAD ICMP %d:%d outer dst: ", 8022 icmptype, icmpcode); 8023 pf_print_host(pd->src, 0, pd->af); 8024 printf(" -> "); 8025 pf_print_host(pd->dst, 0, pd->af); 8026 printf(" inner src: "); 8027 pf_print_host(pd2.src, 0, pd2.af); 8028 printf(" -> "); 8029 pf_print_host(pd2.dst, 0, pd2.af); 8030 printf("\n"); 8031 } 8032 REASON_SET(reason, PFRES_BADSTATE); 8033 return (PF_DROP); 8034 } 8035 8036 switch (pd2.proto) { 8037 case IPPROTO_TCP: { 8038 struct tcphdr *th = &pd2.hdr.tcp; 8039 u_int32_t seq; 8040 struct pf_state_peer *src, *dst; 8041 u_int8_t dws; 8042 int copyback = 0; 8043 int action; 8044 8045 /* 8046 * Only the first 8 bytes of the TCP header can be 8047 * expected. Don't access any TCP header fields after 8048 * th_seq, an ackskew test is not possible. 8049 */ 8050 if (!pf_pull_hdr(pd->m, pd2.off, th, 8, NULL, reason, 8051 pd2.af)) { 8052 DPFPRINTF(PF_DEBUG_MISC, 8053 "pf: ICMP error message too short " 8054 "(tcp)"); 8055 return (PF_DROP); 8056 } 8057 pd2.pcksum = &pd2.hdr.tcp.th_sum; 8058 8059 key.af = pd2.af; 8060 key.proto = IPPROTO_TCP; 8061 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 8062 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 8063 key.port[pd2.sidx] = th->th_sport; 8064 key.port[pd2.didx] = th->th_dport; 8065 8066 action = pf_find_state(&pd2, &key, state); 8067 if (action != PF_MATCH) 8068 return (action); 8069 8070 if (pd->dir == (*state)->direction) { 8071 if (PF_REVERSED_KEY(*state, pd->af)) { 8072 src = &(*state)->src; 8073 dst = &(*state)->dst; 8074 } else { 8075 src = &(*state)->dst; 8076 dst = &(*state)->src; 8077 } 8078 } else { 8079 if (PF_REVERSED_KEY(*state, pd->af)) { 8080 src = &(*state)->dst; 8081 dst = &(*state)->src; 8082 } else { 8083 src = &(*state)->src; 8084 dst = &(*state)->dst; 8085 } 8086 } 8087 8088 if (src->wscale && dst->wscale) 8089 dws = dst->wscale & PF_WSCALE_MASK; 8090 else 8091 dws = 0; 8092 8093 /* Demodulate sequence number */ 8094 seq = ntohl(th->th_seq) - src->seqdiff; 8095 if (src->seqdiff) { 8096 pf_change_a(&th->th_seq, icmpsum, 8097 htonl(seq), 0); 8098 copyback = 1; 8099 } 8100 8101 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 8102 (!SEQ_GEQ(src->seqhi, seq) || 8103 !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { 8104 if (V_pf_status.debug >= PF_DEBUG_MISC) { 8105 printf("pf: BAD ICMP %d:%d ", 8106 icmptype, icmpcode); 8107 pf_print_host(pd->src, 0, pd->af); 8108 printf(" -> "); 8109 pf_print_host(pd->dst, 0, pd->af); 8110 printf(" state: "); 8111 pf_print_state(*state); 8112 printf(" seq=%u\n", seq); 8113 } 8114 REASON_SET(reason, PFRES_BADSTATE); 8115 return (PF_DROP); 8116 } else { 8117 if (V_pf_status.debug >= PF_DEBUG_MISC) { 8118 printf("pf: OK ICMP %d:%d ", 8119 icmptype, icmpcode); 8120 pf_print_host(pd->src, 0, pd->af); 8121 printf(" -> "); 8122 pf_print_host(pd->dst, 0, pd->af); 8123 printf(" state: "); 8124 pf_print_state(*state); 8125 printf(" seq=%u\n", seq); 8126 } 8127 } 8128 8129 /* translate source/destination address, if necessary */ 8130 if ((*state)->key[PF_SK_WIRE] != 8131 (*state)->key[PF_SK_STACK]) { 8132 8133 struct pf_state_key *nk; 8134 8135 if (PF_REVERSED_KEY(*state, pd->af)) 8136 nk = (*state)->key[pd->sidx]; 8137 else 8138 nk = (*state)->key[pd->didx]; 8139 8140 #if defined(INET) && defined(INET6) 8141 int afto, sidx, didx; 8142 8143 afto = pd->af != nk->af; 8144 8145 if (afto && (*state)->direction == PF_IN) { 8146 sidx = pd2.didx; 8147 didx = pd2.sidx; 8148 } else { 8149 sidx = pd2.sidx; 8150 didx = pd2.didx; 8151 } 8152 8153 if (afto) { 8154 if (pf_translate_icmp_af(nk->af, 8155 &pd->hdr.icmp)) 8156 return (PF_DROP); 8157 m_copyback(pd->m, pd->off, 8158 sizeof(struct icmp6_hdr), 8159 (c_caddr_t)&pd->hdr.icmp6); 8160 if (pf_change_icmp_af(pd->m, ipoff2, pd, 8161 &pd2, &nk->addr[sidx], 8162 &nk->addr[didx], pd->af, 8163 nk->af)) 8164 return (PF_DROP); 8165 pf_addrcpy(&pd->nsaddr, 8166 &nk->addr[pd2.sidx], nk->af); 8167 pf_addrcpy(&pd->ndaddr, 8168 &nk->addr[pd2.didx], nk->af); 8169 if (nk->af == AF_INET) { 8170 pd->proto = IPPROTO_ICMP; 8171 } else { 8172 pd->proto = IPPROTO_ICMPV6; 8173 /* 8174 * IPv4 becomes IPv6 so we must 8175 * copy IPv4 src addr to least 8176 * 32bits in IPv6 address to 8177 * keep traceroute/icmp 8178 * working. 8179 */ 8180 pd->nsaddr.addr32[3] = 8181 pd->src->addr32[0]; 8182 } 8183 pd->naf = pd2.naf = nk->af; 8184 pf_change_ap(&pd2, pd2.src, &th->th_sport, 8185 &nk->addr[pd2.sidx], nk->port[sidx]); 8186 pf_change_ap(&pd2, pd2.dst, &th->th_dport, 8187 &nk->addr[pd2.didx], nk->port[didx]); 8188 m_copyback(pd2.m, pd2.off, 8, (c_caddr_t)th); 8189 return (PF_AFRT); 8190 } 8191 #endif /* INET && INET6 */ 8192 8193 if (PF_ANEQ(pd2.src, 8194 &nk->addr[pd2.sidx], pd2.af) || 8195 nk->port[pd2.sidx] != th->th_sport) 8196 pf_change_icmp(pd2.src, &th->th_sport, 8197 daddr, &nk->addr[pd2.sidx], 8198 nk->port[pd2.sidx], NULL, 8199 pd2.ip_sum, icmpsum, 8200 pd->ip_sum, 0, pd2.af); 8201 8202 if (PF_ANEQ(pd2.dst, 8203 &nk->addr[pd2.didx], pd2.af) || 8204 nk->port[pd2.didx] != th->th_dport) 8205 pf_change_icmp(pd2.dst, &th->th_dport, 8206 saddr, &nk->addr[pd2.didx], 8207 nk->port[pd2.didx], NULL, 8208 pd2.ip_sum, icmpsum, 8209 pd->ip_sum, 0, pd2.af); 8210 copyback = 1; 8211 } 8212 8213 if (copyback) { 8214 switch (pd2.af) { 8215 #ifdef INET 8216 case AF_INET: 8217 m_copyback(pd->m, pd->off, ICMP_MINLEN, 8218 (caddr_t )&pd->hdr.icmp); 8219 m_copyback(pd->m, ipoff2, sizeof(h2), 8220 (caddr_t )&h2); 8221 break; 8222 #endif /* INET */ 8223 #ifdef INET6 8224 case AF_INET6: 8225 m_copyback(pd->m, pd->off, 8226 sizeof(struct icmp6_hdr), 8227 (caddr_t )&pd->hdr.icmp6); 8228 m_copyback(pd->m, ipoff2, sizeof(h2_6), 8229 (caddr_t )&h2_6); 8230 break; 8231 #endif /* INET6 */ 8232 default: 8233 unhandled_af(pd->af); 8234 } 8235 m_copyback(pd->m, pd2.off, 8, (caddr_t)th); 8236 } 8237 8238 return (PF_PASS); 8239 break; 8240 } 8241 case IPPROTO_UDP: { 8242 struct udphdr *uh = &pd2.hdr.udp; 8243 int action; 8244 8245 if (!pf_pull_hdr(pd->m, pd2.off, uh, sizeof(*uh), 8246 NULL, reason, pd2.af)) { 8247 DPFPRINTF(PF_DEBUG_MISC, 8248 "pf: ICMP error message too short " 8249 "(udp)"); 8250 return (PF_DROP); 8251 } 8252 pd2.pcksum = &pd2.hdr.udp.uh_sum; 8253 8254 key.af = pd2.af; 8255 key.proto = IPPROTO_UDP; 8256 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 8257 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 8258 key.port[pd2.sidx] = uh->uh_sport; 8259 key.port[pd2.didx] = uh->uh_dport; 8260 8261 action = pf_find_state(&pd2, &key, state); 8262 if (action != PF_MATCH) 8263 return (action); 8264 8265 /* translate source/destination address, if necessary */ 8266 if ((*state)->key[PF_SK_WIRE] != 8267 (*state)->key[PF_SK_STACK]) { 8268 struct pf_state_key *nk; 8269 8270 if (PF_REVERSED_KEY(*state, pd->af)) 8271 nk = (*state)->key[pd->sidx]; 8272 else 8273 nk = (*state)->key[pd->didx]; 8274 8275 #if defined(INET) && defined(INET6) 8276 int afto, sidx, didx; 8277 8278 afto = pd->af != nk->af; 8279 8280 if (afto && (*state)->direction == PF_IN) { 8281 sidx = pd2.didx; 8282 didx = pd2.sidx; 8283 } else { 8284 sidx = pd2.sidx; 8285 didx = pd2.didx; 8286 } 8287 8288 if (afto) { 8289 if (pf_translate_icmp_af(nk->af, 8290 &pd->hdr.icmp)) 8291 return (PF_DROP); 8292 m_copyback(pd->m, pd->off, 8293 sizeof(struct icmp6_hdr), 8294 (c_caddr_t)&pd->hdr.icmp6); 8295 if (pf_change_icmp_af(pd->m, ipoff2, pd, 8296 &pd2, &nk->addr[sidx], 8297 &nk->addr[didx], pd->af, 8298 nk->af)) 8299 return (PF_DROP); 8300 pf_addrcpy(&pd->nsaddr, 8301 &nk->addr[pd2.sidx], nk->af); 8302 pf_addrcpy(&pd->ndaddr, 8303 &nk->addr[pd2.didx], nk->af); 8304 if (nk->af == AF_INET) { 8305 pd->proto = IPPROTO_ICMP; 8306 } else { 8307 pd->proto = IPPROTO_ICMPV6; 8308 /* 8309 * IPv4 becomes IPv6 so we must 8310 * copy IPv4 src addr to least 8311 * 32bits in IPv6 address to 8312 * keep traceroute/icmp 8313 * working. 8314 */ 8315 pd->nsaddr.addr32[3] = 8316 pd->src->addr32[0]; 8317 } 8318 pd->naf = pd2.naf = nk->af; 8319 pf_change_ap(&pd2, pd2.src, &uh->uh_sport, 8320 &nk->addr[pd2.sidx], nk->port[sidx]); 8321 pf_change_ap(&pd2, pd2.dst, &uh->uh_dport, 8322 &nk->addr[pd2.didx], nk->port[didx]); 8323 m_copyback(pd2.m, pd2.off, sizeof(*uh), 8324 (c_caddr_t)uh); 8325 return (PF_AFRT); 8326 } 8327 #endif /* INET && INET6 */ 8328 8329 if (PF_ANEQ(pd2.src, 8330 &nk->addr[pd2.sidx], pd2.af) || 8331 nk->port[pd2.sidx] != uh->uh_sport) 8332 pf_change_icmp(pd2.src, &uh->uh_sport, 8333 daddr, &nk->addr[pd2.sidx], 8334 nk->port[pd2.sidx], &uh->uh_sum, 8335 pd2.ip_sum, icmpsum, 8336 pd->ip_sum, 1, pd2.af); 8337 8338 if (PF_ANEQ(pd2.dst, 8339 &nk->addr[pd2.didx], pd2.af) || 8340 nk->port[pd2.didx] != uh->uh_dport) 8341 pf_change_icmp(pd2.dst, &uh->uh_dport, 8342 saddr, &nk->addr[pd2.didx], 8343 nk->port[pd2.didx], &uh->uh_sum, 8344 pd2.ip_sum, icmpsum, 8345 pd->ip_sum, 1, pd2.af); 8346 8347 switch (pd2.af) { 8348 #ifdef INET 8349 case AF_INET: 8350 m_copyback(pd->m, pd->off, ICMP_MINLEN, 8351 (caddr_t )&pd->hdr.icmp); 8352 m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2); 8353 break; 8354 #endif /* INET */ 8355 #ifdef INET6 8356 case AF_INET6: 8357 m_copyback(pd->m, pd->off, 8358 sizeof(struct icmp6_hdr), 8359 (caddr_t )&pd->hdr.icmp6); 8360 m_copyback(pd->m, ipoff2, sizeof(h2_6), 8361 (caddr_t )&h2_6); 8362 break; 8363 #endif /* INET6 */ 8364 } 8365 m_copyback(pd->m, pd2.off, sizeof(*uh), (caddr_t)uh); 8366 } 8367 return (PF_PASS); 8368 break; 8369 } 8370 #ifdef INET 8371 case IPPROTO_SCTP: { 8372 struct sctphdr *sh = &pd2.hdr.sctp; 8373 struct pf_state_peer *src; 8374 int copyback = 0; 8375 int action; 8376 8377 if (! pf_pull_hdr(pd->m, pd2.off, sh, sizeof(*sh), NULL, reason, 8378 pd2.af)) { 8379 DPFPRINTF(PF_DEBUG_MISC, 8380 "pf: ICMP error message too short " 8381 "(sctp)"); 8382 return (PF_DROP); 8383 } 8384 pd2.pcksum = &pd2.sctp_dummy_sum; 8385 8386 key.af = pd2.af; 8387 key.proto = IPPROTO_SCTP; 8388 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 8389 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 8390 key.port[pd2.sidx] = sh->src_port; 8391 key.port[pd2.didx] = sh->dest_port; 8392 8393 action = pf_find_state(&pd2, &key, state); 8394 if (action != PF_MATCH) 8395 return (action); 8396 8397 if (pd->dir == (*state)->direction) { 8398 if (PF_REVERSED_KEY(*state, pd->af)) 8399 src = &(*state)->src; 8400 else 8401 src = &(*state)->dst; 8402 } else { 8403 if (PF_REVERSED_KEY(*state, pd->af)) 8404 src = &(*state)->dst; 8405 else 8406 src = &(*state)->src; 8407 } 8408 8409 if (src->scrub->pfss_v_tag != sh->v_tag) { 8410 DPFPRINTF(PF_DEBUG_MISC, 8411 "pf: ICMP error message has incorrect " 8412 "SCTP v_tag"); 8413 return (PF_DROP); 8414 } 8415 8416 /* translate source/destination address, if necessary */ 8417 if ((*state)->key[PF_SK_WIRE] != 8418 (*state)->key[PF_SK_STACK]) { 8419 8420 struct pf_state_key *nk; 8421 8422 if (PF_REVERSED_KEY(*state, pd->af)) 8423 nk = (*state)->key[pd->sidx]; 8424 else 8425 nk = (*state)->key[pd->didx]; 8426 8427 #if defined(INET) && defined(INET6) 8428 int afto, sidx, didx; 8429 8430 afto = pd->af != nk->af; 8431 8432 if (afto && (*state)->direction == PF_IN) { 8433 sidx = pd2.didx; 8434 didx = pd2.sidx; 8435 } else { 8436 sidx = pd2.sidx; 8437 didx = pd2.didx; 8438 } 8439 8440 if (afto) { 8441 if (pf_translate_icmp_af(nk->af, 8442 &pd->hdr.icmp)) 8443 return (PF_DROP); 8444 m_copyback(pd->m, pd->off, 8445 sizeof(struct icmp6_hdr), 8446 (c_caddr_t)&pd->hdr.icmp6); 8447 if (pf_change_icmp_af(pd->m, ipoff2, pd, 8448 &pd2, &nk->addr[sidx], 8449 &nk->addr[didx], pd->af, 8450 nk->af)) 8451 return (PF_DROP); 8452 sh->src_port = nk->port[sidx]; 8453 sh->dest_port = nk->port[didx]; 8454 m_copyback(pd2.m, pd2.off, sizeof(*sh), (c_caddr_t)sh); 8455 pf_addrcpy(&pd->nsaddr, 8456 &nk->addr[pd2.sidx], nk->af); 8457 pf_addrcpy(&pd->ndaddr, 8458 &nk->addr[pd2.didx], nk->af); 8459 if (nk->af == AF_INET) { 8460 pd->proto = IPPROTO_ICMP; 8461 } else { 8462 pd->proto = IPPROTO_ICMPV6; 8463 /* 8464 * IPv4 becomes IPv6 so we must 8465 * copy IPv4 src addr to least 8466 * 32bits in IPv6 address to 8467 * keep traceroute/icmp 8468 * working. 8469 */ 8470 pd->nsaddr.addr32[3] = 8471 pd->src->addr32[0]; 8472 } 8473 pd->naf = nk->af; 8474 return (PF_AFRT); 8475 } 8476 #endif /* INET && INET6 */ 8477 8478 if (PF_ANEQ(pd2.src, 8479 &nk->addr[pd2.sidx], pd2.af) || 8480 nk->port[pd2.sidx] != sh->src_port) 8481 pf_change_icmp(pd2.src, &sh->src_port, 8482 daddr, &nk->addr[pd2.sidx], 8483 nk->port[pd2.sidx], NULL, 8484 pd2.ip_sum, icmpsum, 8485 pd->ip_sum, 0, pd2.af); 8486 8487 if (PF_ANEQ(pd2.dst, 8488 &nk->addr[pd2.didx], pd2.af) || 8489 nk->port[pd2.didx] != sh->dest_port) 8490 pf_change_icmp(pd2.dst, &sh->dest_port, 8491 saddr, &nk->addr[pd2.didx], 8492 nk->port[pd2.didx], NULL, 8493 pd2.ip_sum, icmpsum, 8494 pd->ip_sum, 0, pd2.af); 8495 copyback = 1; 8496 } 8497 8498 if (copyback) { 8499 switch (pd2.af) { 8500 #ifdef INET 8501 case AF_INET: 8502 m_copyback(pd->m, pd->off, ICMP_MINLEN, 8503 (caddr_t )&pd->hdr.icmp); 8504 m_copyback(pd->m, ipoff2, sizeof(h2), 8505 (caddr_t )&h2); 8506 break; 8507 #endif /* INET */ 8508 #ifdef INET6 8509 case AF_INET6: 8510 m_copyback(pd->m, pd->off, 8511 sizeof(struct icmp6_hdr), 8512 (caddr_t )&pd->hdr.icmp6); 8513 m_copyback(pd->m, ipoff2, sizeof(h2_6), 8514 (caddr_t )&h2_6); 8515 break; 8516 #endif /* INET6 */ 8517 } 8518 m_copyback(pd->m, pd2.off, sizeof(*sh), (caddr_t)sh); 8519 } 8520 8521 return (PF_PASS); 8522 break; 8523 } 8524 case IPPROTO_ICMP: { 8525 struct icmp *iih = &pd2.hdr.icmp; 8526 8527 if (pd2.af != AF_INET) { 8528 REASON_SET(reason, PFRES_NORM); 8529 return (PF_DROP); 8530 } 8531 8532 if (!pf_pull_hdr(pd->m, pd2.off, iih, ICMP_MINLEN, 8533 NULL, reason, pd2.af)) { 8534 DPFPRINTF(PF_DEBUG_MISC, 8535 "pf: ICMP error message too short i" 8536 "(icmp)"); 8537 return (PF_DROP); 8538 } 8539 pd2.pcksum = &pd2.hdr.icmp.icmp_cksum; 8540 8541 icmpid = iih->icmp_id; 8542 pf_icmp_mapping(&pd2, iih->icmp_type, 8543 &icmp_dir, &virtual_id, &virtual_type); 8544 8545 ret = pf_icmp_state_lookup(&key, &pd2, state, 8546 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 8547 if (ret >= 0) { 8548 MPASS(*state == NULL); 8549 return (ret); 8550 } 8551 8552 /* translate source/destination address, if necessary */ 8553 if ((*state)->key[PF_SK_WIRE] != 8554 (*state)->key[PF_SK_STACK]) { 8555 struct pf_state_key *nk; 8556 8557 if (PF_REVERSED_KEY(*state, pd->af)) 8558 nk = (*state)->key[pd->sidx]; 8559 else 8560 nk = (*state)->key[pd->didx]; 8561 8562 #if defined(INET) && defined(INET6) 8563 int afto, sidx, didx; 8564 8565 afto = pd->af != nk->af; 8566 8567 if (afto && (*state)->direction == PF_IN) { 8568 sidx = pd2.didx; 8569 didx = pd2.sidx; 8570 iidx = !iidx; 8571 } else { 8572 sidx = pd2.sidx; 8573 didx = pd2.didx; 8574 } 8575 8576 if (afto) { 8577 if (nk->af != AF_INET6) 8578 return (PF_DROP); 8579 if (pf_translate_icmp_af(nk->af, 8580 &pd->hdr.icmp)) 8581 return (PF_DROP); 8582 m_copyback(pd->m, pd->off, 8583 sizeof(struct icmp6_hdr), 8584 (c_caddr_t)&pd->hdr.icmp6); 8585 if (pf_change_icmp_af(pd->m, ipoff2, pd, 8586 &pd2, &nk->addr[sidx], 8587 &nk->addr[didx], pd->af, 8588 nk->af)) 8589 return (PF_DROP); 8590 pd->proto = IPPROTO_ICMPV6; 8591 if (pf_translate_icmp_af(nk->af, iih)) 8592 return (PF_DROP); 8593 if (virtual_type == htons(ICMP_ECHO) && 8594 nk->port[iidx] != iih->icmp_id) 8595 iih->icmp_id = nk->port[iidx]; 8596 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 8597 (c_caddr_t)iih); 8598 pf_addrcpy(&pd->nsaddr, 8599 &nk->addr[pd2.sidx], nk->af); 8600 pf_addrcpy(&pd->ndaddr, 8601 &nk->addr[pd2.didx], nk->af); 8602 /* 8603 * IPv4 becomes IPv6 so we must copy 8604 * IPv4 src addr to least 32bits in 8605 * IPv6 address to keep traceroute 8606 * working. 8607 */ 8608 pd->nsaddr.addr32[3] = 8609 pd->src->addr32[0]; 8610 pd->naf = nk->af; 8611 return (PF_AFRT); 8612 } 8613 #endif /* INET && INET6 */ 8614 8615 if (PF_ANEQ(pd2.src, 8616 &nk->addr[pd2.sidx], pd2.af) || 8617 (virtual_type == htons(ICMP_ECHO) && 8618 nk->port[iidx] != iih->icmp_id)) 8619 pf_change_icmp(pd2.src, 8620 (virtual_type == htons(ICMP_ECHO)) ? 8621 &iih->icmp_id : NULL, 8622 daddr, &nk->addr[pd2.sidx], 8623 (virtual_type == htons(ICMP_ECHO)) ? 8624 nk->port[iidx] : 0, NULL, 8625 pd2.ip_sum, icmpsum, 8626 pd->ip_sum, 0, AF_INET); 8627 8628 if (PF_ANEQ(pd2.dst, 8629 &nk->addr[pd2.didx], pd2.af)) 8630 pf_change_icmp(pd2.dst, NULL, NULL, 8631 &nk->addr[pd2.didx], 0, NULL, 8632 pd2.ip_sum, icmpsum, pd->ip_sum, 0, 8633 AF_INET); 8634 8635 m_copyback(pd->m, pd->off, ICMP_MINLEN, (caddr_t)&pd->hdr.icmp); 8636 m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2); 8637 m_copyback(pd->m, pd2.off, ICMP_MINLEN, (caddr_t)iih); 8638 } 8639 return (PF_PASS); 8640 break; 8641 } 8642 #endif /* INET */ 8643 #ifdef INET6 8644 case IPPROTO_ICMPV6: { 8645 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 8646 8647 if (pd2.af != AF_INET6) { 8648 REASON_SET(reason, PFRES_NORM); 8649 return (PF_DROP); 8650 } 8651 8652 if (!pf_pull_hdr(pd->m, pd2.off, iih, 8653 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 8654 DPFPRINTF(PF_DEBUG_MISC, 8655 "pf: ICMP error message too short " 8656 "(icmp6)"); 8657 return (PF_DROP); 8658 } 8659 pd2.pcksum = &pd2.hdr.icmp6.icmp6_cksum; 8660 8661 pf_icmp_mapping(&pd2, iih->icmp6_type, 8662 &icmp_dir, &virtual_id, &virtual_type); 8663 8664 ret = pf_icmp_state_lookup(&key, &pd2, state, 8665 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 8666 /* IPv6? try matching a multicast address */ 8667 if (ret == PF_DROP && pd2.af == AF_INET6 && 8668 icmp_dir == PF_OUT) { 8669 MPASS(*state == NULL); 8670 ret = pf_icmp_state_lookup(&key, &pd2, 8671 state, virtual_id, virtual_type, 8672 icmp_dir, &iidx, 1, 1); 8673 } 8674 if (ret >= 0) { 8675 MPASS(*state == NULL); 8676 return (ret); 8677 } 8678 8679 /* translate source/destination address, if necessary */ 8680 if ((*state)->key[PF_SK_WIRE] != 8681 (*state)->key[PF_SK_STACK]) { 8682 struct pf_state_key *nk; 8683 8684 if (PF_REVERSED_KEY(*state, pd->af)) 8685 nk = (*state)->key[pd->sidx]; 8686 else 8687 nk = (*state)->key[pd->didx]; 8688 8689 #if defined(INET) && defined(INET6) 8690 int afto, sidx, didx; 8691 8692 afto = pd->af != nk->af; 8693 8694 if (afto && (*state)->direction == PF_IN) { 8695 sidx = pd2.didx; 8696 didx = pd2.sidx; 8697 iidx = !iidx; 8698 } else { 8699 sidx = pd2.sidx; 8700 didx = pd2.didx; 8701 } 8702 8703 if (afto) { 8704 if (nk->af != AF_INET) 8705 return (PF_DROP); 8706 if (pf_translate_icmp_af(nk->af, 8707 &pd->hdr.icmp)) 8708 return (PF_DROP); 8709 m_copyback(pd->m, pd->off, 8710 sizeof(struct icmp6_hdr), 8711 (c_caddr_t)&pd->hdr.icmp6); 8712 if (pf_change_icmp_af(pd->m, ipoff2, pd, 8713 &pd2, &nk->addr[sidx], 8714 &nk->addr[didx], pd->af, 8715 nk->af)) 8716 return (PF_DROP); 8717 pd->proto = IPPROTO_ICMP; 8718 if (pf_translate_icmp_af(nk->af, iih)) 8719 return (PF_DROP); 8720 if (virtual_type == 8721 htons(ICMP6_ECHO_REQUEST) && 8722 nk->port[iidx] != iih->icmp6_id) 8723 iih->icmp6_id = nk->port[iidx]; 8724 m_copyback(pd2.m, pd2.off, 8725 sizeof(struct icmp6_hdr), (c_caddr_t)iih); 8726 pf_addrcpy(&pd->nsaddr, 8727 &nk->addr[pd2.sidx], nk->af); 8728 pf_addrcpy(&pd->ndaddr, 8729 &nk->addr[pd2.didx], nk->af); 8730 pd->naf = nk->af; 8731 return (PF_AFRT); 8732 } 8733 #endif /* INET && INET6 */ 8734 8735 if (PF_ANEQ(pd2.src, 8736 &nk->addr[pd2.sidx], pd2.af) || 8737 ((virtual_type == htons(ICMP6_ECHO_REQUEST)) && 8738 nk->port[pd2.sidx] != iih->icmp6_id)) 8739 pf_change_icmp(pd2.src, 8740 (virtual_type == htons(ICMP6_ECHO_REQUEST)) 8741 ? &iih->icmp6_id : NULL, 8742 daddr, &nk->addr[pd2.sidx], 8743 (virtual_type == htons(ICMP6_ECHO_REQUEST)) 8744 ? nk->port[iidx] : 0, NULL, 8745 pd2.ip_sum, icmpsum, 8746 pd->ip_sum, 0, AF_INET6); 8747 8748 if (PF_ANEQ(pd2.dst, 8749 &nk->addr[pd2.didx], pd2.af)) 8750 pf_change_icmp(pd2.dst, NULL, NULL, 8751 &nk->addr[pd2.didx], 0, NULL, 8752 pd2.ip_sum, icmpsum, 8753 pd->ip_sum, 0, AF_INET6); 8754 8755 m_copyback(pd->m, pd->off, sizeof(struct icmp6_hdr), 8756 (caddr_t)&pd->hdr.icmp6); 8757 m_copyback(pd->m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); 8758 m_copyback(pd->m, pd2.off, sizeof(struct icmp6_hdr), 8759 (caddr_t)iih); 8760 } 8761 return (PF_PASS); 8762 break; 8763 } 8764 #endif /* INET6 */ 8765 default: { 8766 int action; 8767 8768 key.af = pd2.af; 8769 key.proto = pd2.proto; 8770 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 8771 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 8772 key.port[0] = key.port[1] = 0; 8773 8774 action = pf_find_state(&pd2, &key, state); 8775 if (action != PF_MATCH) 8776 return (action); 8777 8778 /* translate source/destination address, if necessary */ 8779 if ((*state)->key[PF_SK_WIRE] != 8780 (*state)->key[PF_SK_STACK]) { 8781 struct pf_state_key *nk = 8782 (*state)->key[pd->didx]; 8783 8784 if (PF_ANEQ(pd2.src, 8785 &nk->addr[pd2.sidx], pd2.af)) 8786 pf_change_icmp(pd2.src, NULL, daddr, 8787 &nk->addr[pd2.sidx], 0, NULL, 8788 pd2.ip_sum, icmpsum, 8789 pd->ip_sum, 0, pd2.af); 8790 8791 if (PF_ANEQ(pd2.dst, 8792 &nk->addr[pd2.didx], pd2.af)) 8793 pf_change_icmp(pd2.dst, NULL, saddr, 8794 &nk->addr[pd2.didx], 0, NULL, 8795 pd2.ip_sum, icmpsum, 8796 pd->ip_sum, 0, pd2.af); 8797 8798 switch (pd2.af) { 8799 #ifdef INET 8800 case AF_INET: 8801 m_copyback(pd->m, pd->off, ICMP_MINLEN, 8802 (caddr_t)&pd->hdr.icmp); 8803 m_copyback(pd->m, ipoff2, sizeof(h2), (caddr_t)&h2); 8804 break; 8805 #endif /* INET */ 8806 #ifdef INET6 8807 case AF_INET6: 8808 m_copyback(pd->m, pd->off, 8809 sizeof(struct icmp6_hdr), 8810 (caddr_t )&pd->hdr.icmp6); 8811 m_copyback(pd->m, ipoff2, sizeof(h2_6), 8812 (caddr_t )&h2_6); 8813 break; 8814 #endif /* INET6 */ 8815 } 8816 } 8817 return (PF_PASS); 8818 break; 8819 } 8820 } 8821 } 8822 } 8823 8824 /* 8825 * ipoff and off are measured from the start of the mbuf chain. 8826 * h must be at "ipoff" on the mbuf chain. 8827 */ 8828 void * 8829 pf_pull_hdr(const struct mbuf *m, int off, void *p, int len, 8830 u_short *actionp, u_short *reasonp, sa_family_t af) 8831 { 8832 int iplen = 0; 8833 switch (af) { 8834 #ifdef INET 8835 case AF_INET: { 8836 const struct ip *h = mtod(m, struct ip *); 8837 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 8838 8839 if (fragoff) { 8840 if (fragoff >= len) 8841 ACTION_SET(actionp, PF_PASS); 8842 else { 8843 ACTION_SET(actionp, PF_DROP); 8844 REASON_SET(reasonp, PFRES_FRAG); 8845 } 8846 return (NULL); 8847 } 8848 iplen = ntohs(h->ip_len); 8849 break; 8850 } 8851 #endif /* INET */ 8852 #ifdef INET6 8853 case AF_INET6: { 8854 const struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 8855 8856 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 8857 break; 8858 } 8859 #endif /* INET6 */ 8860 } 8861 if (m->m_pkthdr.len < off + len || iplen < off + len) { 8862 ACTION_SET(actionp, PF_DROP); 8863 REASON_SET(reasonp, PFRES_SHORT); 8864 return (NULL); 8865 } 8866 m_copydata(m, off, len, p); 8867 return (p); 8868 } 8869 8870 int 8871 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *kif, 8872 int rtableid) 8873 { 8874 struct ifnet *ifp; 8875 8876 /* 8877 * Skip check for addresses with embedded interface scope, 8878 * as they would always match anyway. 8879 */ 8880 if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6)) 8881 return (1); 8882 8883 if (af != AF_INET && af != AF_INET6) 8884 return (0); 8885 8886 if (kif == V_pfi_all) 8887 return (1); 8888 8889 /* Skip checks for ipsec interfaces */ 8890 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 8891 return (1); 8892 8893 ifp = (kif != NULL) ? kif->pfik_ifp : NULL; 8894 8895 switch (af) { 8896 #ifdef INET6 8897 case AF_INET6: 8898 return (fib6_check_urpf(rtableid, &addr->v6, 0, NHR_NONE, 8899 ifp)); 8900 #endif /* INET6 */ 8901 #ifdef INET 8902 case AF_INET: 8903 return (fib4_check_urpf(rtableid, addr->v4, 0, NHR_NONE, 8904 ifp)); 8905 #endif /* INET */ 8906 } 8907 8908 return (0); 8909 } 8910 8911 #ifdef INET 8912 static void 8913 pf_route(struct pf_krule *r, struct ifnet *oifp, 8914 struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) 8915 { 8916 struct mbuf *m0, *m1, *md; 8917 struct route ro; 8918 const struct sockaddr *gw = &ro.ro_dst; 8919 struct sockaddr_in *dst; 8920 struct ip *ip; 8921 struct ifnet *ifp = NULL; 8922 int error = 0; 8923 uint16_t ip_len, ip_off; 8924 uint16_t tmp; 8925 int r_dir; 8926 bool skip_test = false; 8927 8928 KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__)); 8929 8930 SDT_PROBE4(pf, ip, route_to, entry, pd->m, pd, s, oifp); 8931 8932 if (s) { 8933 r_dir = s->direction; 8934 } else { 8935 r_dir = r->direction; 8936 } 8937 8938 KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT || 8939 r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction", 8940 __func__)); 8941 8942 if ((pd->pf_mtag == NULL && 8943 ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) || 8944 pd->pf_mtag->routed++ > 3) { 8945 m0 = pd->m; 8946 pd->m = NULL; 8947 SDT_PROBE1(pf, ip, route_to, drop, __LINE__); 8948 goto bad_locked; 8949 } 8950 8951 if (pd->act.rt_kif != NULL) 8952 ifp = pd->act.rt_kif->pfik_ifp; 8953 8954 if (pd->act.rt == PF_DUPTO) { 8955 if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) { 8956 if (s != NULL) { 8957 PF_STATE_UNLOCK(s); 8958 } 8959 if (ifp == oifp) { 8960 /* When the 2nd interface is not skipped */ 8961 return; 8962 } else { 8963 m0 = pd->m; 8964 pd->m = NULL; 8965 SDT_PROBE1(pf, ip, route_to, drop, __LINE__); 8966 goto bad; 8967 } 8968 } else { 8969 pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED; 8970 if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) { 8971 if (s) 8972 PF_STATE_UNLOCK(s); 8973 return; 8974 } 8975 } 8976 } else { 8977 if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) { 8978 if (pd->af == pd->naf) { 8979 pf_dummynet(pd, s, r, &pd->m); 8980 if (s) 8981 PF_STATE_UNLOCK(s); 8982 return; 8983 } else { 8984 if (r_dir == PF_IN) { 8985 skip_test = true; 8986 } 8987 } 8988 } 8989 8990 /* 8991 * If we're actually doing route-to and af-to and are in the 8992 * reply direction. 8993 */ 8994 if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp && 8995 pd->af != pd->naf) { 8996 if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET) { 8997 /* Un-set ifp so we do a plain route lookup. */ 8998 ifp = NULL; 8999 } 9000 if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET6) { 9001 /* Un-set ifp so we do a plain route lookup. */ 9002 ifp = NULL; 9003 } 9004 } 9005 m0 = pd->m; 9006 } 9007 9008 ip = mtod(m0, struct ip *); 9009 9010 bzero(&ro, sizeof(ro)); 9011 dst = (struct sockaddr_in *)&ro.ro_dst; 9012 dst->sin_family = AF_INET; 9013 dst->sin_len = sizeof(struct sockaddr_in); 9014 dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr; 9015 9016 if (pd->dir == PF_IN) { 9017 if (ip->ip_ttl <= IPTTLDEC) { 9018 if (r->rt != PF_DUPTO) 9019 pf_send_icmp(m0, ICMP_TIMXCEED, 9020 ICMP_TIMXCEED_INTRANS, 0, pd->af, r, 9021 pd->act.rtableid); 9022 goto bad_locked; 9023 } 9024 ip->ip_ttl -= IPTTLDEC; 9025 } 9026 9027 if (s != NULL) { 9028 if (ifp == NULL && (pd->af != pd->naf)) { 9029 /* We're in the AFTO case. Do a route lookup. */ 9030 const struct nhop_object *nh; 9031 nh = fib4_lookup(M_GETFIB(m0), ip->ip_dst, 0, NHR_NONE, 0); 9032 if (nh) { 9033 ifp = nh->nh_ifp; 9034 9035 /* Use the gateway if needed. */ 9036 if (nh->nh_flags & NHF_GATEWAY) { 9037 gw = &nh->gw_sa; 9038 ro.ro_flags |= RT_HAS_GW; 9039 } else { 9040 dst->sin_addr = ip->ip_dst; 9041 } 9042 9043 /* 9044 * Bind to the correct interface if we're 9045 * if-bound. We don't know which interface 9046 * that will be until here, so we've inserted 9047 * the state on V_pf_all. Fix that now. 9048 */ 9049 if (s->kif == V_pfi_all && ifp != NULL && 9050 r->rule_flag & PFRULE_IFBOUND) 9051 s->kif = ifp->if_pf_kif; 9052 } 9053 } 9054 9055 if (r->rule_flag & PFRULE_IFBOUND && 9056 pd->act.rt == PF_REPLYTO && 9057 s->kif == V_pfi_all) { 9058 s->kif = pd->act.rt_kif; 9059 s->orig_kif = oifp->if_pf_kif; 9060 } 9061 9062 PF_STATE_UNLOCK(s); 9063 } 9064 9065 if (ifp == NULL) { 9066 m0 = pd->m; 9067 pd->m = NULL; 9068 SDT_PROBE1(pf, ip, route_to, drop, __LINE__); 9069 goto bad; 9070 } 9071 9072 if (r->rt == PF_DUPTO) 9073 skip_test = true; 9074 9075 if (pd->dir == PF_IN && !skip_test) { 9076 if (pf_test(AF_INET, PF_OUT, PFIL_FWD, ifp, &m0, inp, 9077 &pd->act) != PF_PASS) { 9078 SDT_PROBE1(pf, ip, route_to, drop, __LINE__); 9079 goto bad; 9080 } else if (m0 == NULL) { 9081 SDT_PROBE1(pf, ip, route_to, drop, __LINE__); 9082 goto done; 9083 } 9084 if (m0->m_len < sizeof(struct ip)) { 9085 DPFPRINTF(PF_DEBUG_URGENT, 9086 "%s: m0->m_len < sizeof(struct ip)", __func__); 9087 SDT_PROBE1(pf, ip, route_to, drop, __LINE__); 9088 goto bad; 9089 } 9090 ip = mtod(m0, struct ip *); 9091 } 9092 9093 if (ifp->if_flags & IFF_LOOPBACK) 9094 m0->m_flags |= M_SKIP_FIREWALL; 9095 9096 ip_len = ntohs(ip->ip_len); 9097 ip_off = ntohs(ip->ip_off); 9098 9099 /* Copied from FreeBSD 10.0-CURRENT ip_output. */ 9100 m0->m_pkthdr.csum_flags |= CSUM_IP; 9101 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) { 9102 in_delayed_cksum(m0); 9103 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 9104 } 9105 if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) { 9106 pf_sctp_checksum(m0, (uint32_t)(ip->ip_hl << 2)); 9107 m0->m_pkthdr.csum_flags &= ~CSUM_SCTP; 9108 } 9109 9110 if (pd->dir == PF_IN) { 9111 /* 9112 * Make sure dummynet gets the correct direction, in case it needs to 9113 * re-inject later. 9114 */ 9115 pd->dir = PF_OUT; 9116 9117 /* 9118 * The following processing is actually the rest of the inbound processing, even 9119 * though we've marked it as outbound (so we don't look through dummynet) and it 9120 * happens after the outbound processing (pf_test(PF_OUT) above). 9121 * Swap the dummynet pipe numbers, because it's going to come to the wrong 9122 * conclusion about what direction it's processing, and we can't fix it or it 9123 * will re-inject incorrectly. Swapping the pipe numbers means that its incorrect 9124 * decision will pick the right pipe, and everything will mostly work as expected. 9125 */ 9126 tmp = pd->act.dnrpipe; 9127 pd->act.dnrpipe = pd->act.dnpipe; 9128 pd->act.dnpipe = tmp; 9129 } 9130 9131 /* 9132 * If small enough for interface, or the interface will take 9133 * care of the fragmentation for us, we can just send directly. 9134 */ 9135 if (ip_len <= ifp->if_mtu || 9136 (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) { 9137 ip->ip_sum = 0; 9138 if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) { 9139 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 9140 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 9141 } 9142 m_clrprotoflags(m0); /* Avoid confusing lower layers. */ 9143 9144 md = m0; 9145 error = pf_dummynet_route(pd, s, r, ifp, gw, &md); 9146 if (md != NULL) { 9147 error = (*ifp->if_output)(ifp, md, gw, &ro); 9148 SDT_PROBE2(pf, ip, route_to, output, ifp, error); 9149 } 9150 goto done; 9151 } 9152 9153 /* Balk when DF bit is set or the interface didn't support TSO. */ 9154 if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { 9155 error = EMSGSIZE; 9156 KMOD_IPSTAT_INC(ips_cantfrag); 9157 if (pd->act.rt != PF_DUPTO) { 9158 if (s && s->nat_rule != NULL) { 9159 MPASS(m0 == pd->m); 9160 PACKET_UNDO_NAT(pd, 9161 (ip->ip_hl << 2) + (ip_off & IP_OFFMASK), 9162 s); 9163 } 9164 9165 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 9166 ifp->if_mtu, pd->af, r, pd->act.rtableid); 9167 } 9168 SDT_PROBE1(pf, ip, route_to, drop, __LINE__); 9169 goto bad; 9170 } 9171 9172 error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist); 9173 if (error) { 9174 SDT_PROBE1(pf, ip, route_to, drop, __LINE__); 9175 goto bad; 9176 } 9177 9178 for (; m0; m0 = m1) { 9179 m1 = m0->m_nextpkt; 9180 m0->m_nextpkt = NULL; 9181 if (error == 0) { 9182 m_clrprotoflags(m0); 9183 md = m0; 9184 pd->pf_mtag = pf_find_mtag(md); 9185 error = pf_dummynet_route(pd, s, r, ifp, 9186 gw, &md); 9187 if (md != NULL) { 9188 error = (*ifp->if_output)(ifp, md, gw, &ro); 9189 SDT_PROBE2(pf, ip, route_to, output, ifp, error); 9190 } 9191 } else 9192 m_freem(m0); 9193 } 9194 9195 if (error == 0) 9196 KMOD_IPSTAT_INC(ips_fragmented); 9197 9198 done: 9199 if (pd->act.rt != PF_DUPTO) 9200 pd->m = NULL; 9201 return; 9202 9203 bad_locked: 9204 if (s) 9205 PF_STATE_UNLOCK(s); 9206 bad: 9207 m_freem(m0); 9208 goto done; 9209 } 9210 #endif /* INET */ 9211 9212 #ifdef INET6 9213 static void 9214 pf_route6(struct pf_krule *r, struct ifnet *oifp, 9215 struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp) 9216 { 9217 struct mbuf *m0, *md; 9218 struct m_tag *mtag; 9219 struct sockaddr_in6 dst; 9220 struct ip6_hdr *ip6; 9221 struct ifnet *ifp = NULL; 9222 int r_dir; 9223 bool skip_test = false; 9224 9225 KASSERT(pd->m && r && oifp, ("%s: invalid parameters", __func__)); 9226 9227 SDT_PROBE4(pf, ip6, route_to, entry, pd->m, pd, s, oifp); 9228 9229 if (s) { 9230 r_dir = s->direction; 9231 } else { 9232 r_dir = r->direction; 9233 } 9234 9235 KASSERT(pd->dir == PF_IN || pd->dir == PF_OUT || 9236 r_dir == PF_IN || r_dir == PF_OUT, ("%s: invalid direction", 9237 __func__)); 9238 9239 if ((pd->pf_mtag == NULL && 9240 ((pd->pf_mtag = pf_get_mtag(pd->m)) == NULL)) || 9241 pd->pf_mtag->routed++ > 3) { 9242 m0 = pd->m; 9243 pd->m = NULL; 9244 SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); 9245 goto bad_locked; 9246 } 9247 9248 if (pd->act.rt_kif != NULL) 9249 ifp = pd->act.rt_kif->pfik_ifp; 9250 9251 if (pd->act.rt == PF_DUPTO) { 9252 if ((pd->pf_mtag->flags & PF_MTAG_FLAG_DUPLICATED)) { 9253 if (s != NULL) { 9254 PF_STATE_UNLOCK(s); 9255 } 9256 if (ifp == oifp) { 9257 /* When the 2nd interface is not skipped */ 9258 return; 9259 } else { 9260 m0 = pd->m; 9261 pd->m = NULL; 9262 SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); 9263 goto bad; 9264 } 9265 } else { 9266 pd->pf_mtag->flags |= PF_MTAG_FLAG_DUPLICATED; 9267 if (((m0 = m_dup(pd->m, M_NOWAIT)) == NULL)) { 9268 if (s) 9269 PF_STATE_UNLOCK(s); 9270 return; 9271 } 9272 } 9273 } else { 9274 if ((pd->act.rt == PF_REPLYTO) == (r_dir == pd->dir)) { 9275 if (pd->af == pd->naf) { 9276 pf_dummynet(pd, s, r, &pd->m); 9277 if (s) 9278 PF_STATE_UNLOCK(s); 9279 return; 9280 } else { 9281 if (r_dir == PF_IN) { 9282 skip_test = true; 9283 } 9284 } 9285 } 9286 9287 /* 9288 * If we're actually doing route-to and af-to and are in the 9289 * reply direction. 9290 */ 9291 if (pd->act.rt_kif && pd->act.rt_kif->pfik_ifp && 9292 pd->af != pd->naf) { 9293 if (pd->act.rt == PF_ROUTETO && r->naf != AF_INET6) { 9294 /* Un-set ifp so we do a plain route lookup. */ 9295 ifp = NULL; 9296 } 9297 if (pd->act.rt == PF_REPLYTO && r->naf != AF_INET) { 9298 /* Un-set ifp so we do a plain route lookup. */ 9299 ifp = NULL; 9300 } 9301 } 9302 m0 = pd->m; 9303 } 9304 9305 ip6 = mtod(m0, struct ip6_hdr *); 9306 9307 bzero(&dst, sizeof(dst)); 9308 dst.sin6_family = AF_INET6; 9309 dst.sin6_len = sizeof(dst); 9310 pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, 9311 AF_INET6); 9312 9313 if (pd->dir == PF_IN) { 9314 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 9315 if (r->rt != PF_DUPTO) 9316 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 9317 ICMP6_TIME_EXCEED_TRANSIT, 0, pd->af, r, 9318 pd->act.rtableid); 9319 goto bad_locked; 9320 } 9321 ip6->ip6_hlim -= IPV6_HLIMDEC; 9322 } 9323 9324 if (s != NULL) { 9325 if (ifp == NULL && (pd->af != pd->naf)) { 9326 const struct nhop_object *nh; 9327 nh = fib6_lookup(M_GETFIB(m0), &ip6->ip6_dst, 0, NHR_NONE, 0); 9328 if (nh) { 9329 ifp = nh->nh_ifp; 9330 9331 /* Use the gateway if needed. */ 9332 if (nh->nh_flags & NHF_GATEWAY) 9333 bcopy(&nh->gw6_sa.sin6_addr, &dst.sin6_addr, 9334 sizeof(dst.sin6_addr)); 9335 else 9336 dst.sin6_addr = ip6->ip6_dst; 9337 9338 /* 9339 * Bind to the correct interface if we're 9340 * if-bound. We don't know which interface 9341 * that will be until here, so we've inserted 9342 * the state on V_pf_all. Fix that now. 9343 */ 9344 if (s->kif == V_pfi_all && ifp != NULL && 9345 r->rule_flag & PFRULE_IFBOUND) 9346 s->kif = ifp->if_pf_kif; 9347 } 9348 } 9349 9350 if (r->rule_flag & PFRULE_IFBOUND && 9351 pd->act.rt == PF_REPLYTO && 9352 s->kif == V_pfi_all) { 9353 s->kif = pd->act.rt_kif; 9354 s->orig_kif = oifp->if_pf_kif; 9355 } 9356 9357 PF_STATE_UNLOCK(s); 9358 } 9359 9360 if (pd->af != pd->naf) { 9361 struct udphdr *uh = &pd->hdr.udp; 9362 9363 if (pd->proto == IPPROTO_UDP && uh->uh_sum == 0) { 9364 uh->uh_sum = in6_cksum_pseudo(ip6, 9365 ntohs(uh->uh_ulen), IPPROTO_UDP, 0); 9366 m_copyback(m0, pd->off, sizeof(*uh), pd->hdr.any); 9367 } 9368 } 9369 9370 if (ifp == NULL) { 9371 m0 = pd->m; 9372 pd->m = NULL; 9373 SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); 9374 goto bad; 9375 } 9376 9377 if (r->rt == PF_DUPTO) 9378 skip_test = true; 9379 9380 if (pd->dir == PF_IN && !skip_test) { 9381 if (pf_test(AF_INET6, PF_OUT, PFIL_FWD | PF_PFIL_NOREFRAGMENT, 9382 ifp, &m0, inp, &pd->act) != PF_PASS) { 9383 SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); 9384 goto bad; 9385 } else if (m0 == NULL) { 9386 SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); 9387 goto done; 9388 } 9389 if (m0->m_len < sizeof(struct ip6_hdr)) { 9390 DPFPRINTF(PF_DEBUG_URGENT, 9391 "%s: m0->m_len < sizeof(struct ip6_hdr)", 9392 __func__); 9393 SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); 9394 goto bad; 9395 } 9396 ip6 = mtod(m0, struct ip6_hdr *); 9397 } 9398 9399 if (ifp->if_flags & IFF_LOOPBACK) 9400 m0->m_flags |= M_SKIP_FIREWALL; 9401 9402 if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & 9403 ~ifp->if_hwassist) { 9404 uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6); 9405 in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr)); 9406 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; 9407 } 9408 9409 if (pd->dir == PF_IN) { 9410 uint16_t tmp; 9411 /* 9412 * Make sure dummynet gets the correct direction, in case it needs to 9413 * re-inject later. 9414 */ 9415 pd->dir = PF_OUT; 9416 9417 /* 9418 * The following processing is actually the rest of the inbound processing, even 9419 * though we've marked it as outbound (so we don't look through dummynet) and it 9420 * happens after the outbound processing (pf_test(PF_OUT) above). 9421 * Swap the dummynet pipe numbers, because it's going to come to the wrong 9422 * conclusion about what direction it's processing, and we can't fix it or it 9423 * will re-inject incorrectly. Swapping the pipe numbers means that its incorrect 9424 * decision will pick the right pipe, and everything will mostly work as expected. 9425 */ 9426 tmp = pd->act.dnrpipe; 9427 pd->act.dnrpipe = pd->act.dnpipe; 9428 pd->act.dnpipe = tmp; 9429 } 9430 9431 /* 9432 * If the packet is too large for the outgoing interface, 9433 * send back an icmp6 error. 9434 */ 9435 if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr)) 9436 dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 9437 mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL); 9438 if (mtag != NULL) { 9439 int ret __sdt_used; 9440 ret = pf_refragment6(ifp, &m0, mtag, ifp, true); 9441 SDT_PROBE2(pf, ip6, route_to, output, ifp, ret); 9442 goto done; 9443 } 9444 9445 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 9446 md = m0; 9447 pf_dummynet_route(pd, s, r, ifp, sintosa(&dst), &md); 9448 if (md != NULL) { 9449 int ret __sdt_used; 9450 ret = nd6_output_ifp(ifp, ifp, md, &dst, NULL); 9451 SDT_PROBE2(pf, ip6, route_to, output, ifp, ret); 9452 } 9453 } 9454 else { 9455 in6_ifstat_inc(ifp, ifs6_in_toobig); 9456 if (pd->act.rt != PF_DUPTO) { 9457 if (s && s->nat_rule != NULL) { 9458 MPASS(m0 == pd->m); 9459 PACKET_UNDO_NAT(pd, 9460 ((caddr_t)ip6 - m0->m_data) + 9461 sizeof(struct ip6_hdr), s); 9462 } 9463 9464 if (r->rt != PF_DUPTO) 9465 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 9466 ifp->if_mtu, pd->af, r, pd->act.rtableid); 9467 } 9468 SDT_PROBE1(pf, ip6, route_to, drop, __LINE__); 9469 goto bad; 9470 } 9471 9472 done: 9473 if (pd->act.rt != PF_DUPTO) 9474 pd->m = NULL; 9475 return; 9476 9477 bad_locked: 9478 if (s) 9479 PF_STATE_UNLOCK(s); 9480 bad: 9481 m_freem(m0); 9482 goto done; 9483 } 9484 #endif /* INET6 */ 9485 9486 /* 9487 * FreeBSD supports cksum offloads for the following drivers. 9488 * em(4), fxp(4), lge(4), nge(4), re(4), ti(4), txp(4), xl(4) 9489 * 9490 * CSUM_DATA_VALID | CSUM_PSEUDO_HDR : 9491 * network driver performed cksum including pseudo header, need to verify 9492 * csum_data 9493 * CSUM_DATA_VALID : 9494 * network driver performed cksum, needs to additional pseudo header 9495 * cksum computation with partial csum_data(i.e. lack of H/W support for 9496 * pseudo header, for instance sk(4) and possibly gem(4)) 9497 * 9498 * After validating the cksum of packet, set both flag CSUM_DATA_VALID and 9499 * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper 9500 * TCP/UDP layer. 9501 * Also, set csum_data to 0xffff to force cksum validation. 9502 */ 9503 static int 9504 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af) 9505 { 9506 u_int16_t sum = 0; 9507 int hw_assist = 0; 9508 struct ip *ip; 9509 9510 if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) 9511 return (1); 9512 if (m->m_pkthdr.len < off + len) 9513 return (1); 9514 9515 switch (p) { 9516 case IPPROTO_TCP: 9517 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 9518 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { 9519 sum = m->m_pkthdr.csum_data; 9520 } else { 9521 ip = mtod(m, struct ip *); 9522 sum = in_pseudo(ip->ip_src.s_addr, 9523 ip->ip_dst.s_addr, htonl((u_short)len + 9524 m->m_pkthdr.csum_data + IPPROTO_TCP)); 9525 } 9526 sum ^= 0xffff; 9527 ++hw_assist; 9528 } 9529 break; 9530 case IPPROTO_UDP: 9531 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 9532 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { 9533 sum = m->m_pkthdr.csum_data; 9534 } else { 9535 ip = mtod(m, struct ip *); 9536 sum = in_pseudo(ip->ip_src.s_addr, 9537 ip->ip_dst.s_addr, htonl((u_short)len + 9538 m->m_pkthdr.csum_data + IPPROTO_UDP)); 9539 } 9540 sum ^= 0xffff; 9541 ++hw_assist; 9542 } 9543 break; 9544 case IPPROTO_ICMP: 9545 #ifdef INET6 9546 case IPPROTO_ICMPV6: 9547 #endif /* INET6 */ 9548 break; 9549 default: 9550 return (1); 9551 } 9552 9553 if (!hw_assist) { 9554 switch (af) { 9555 case AF_INET: 9556 if (m->m_len < sizeof(struct ip)) 9557 return (1); 9558 sum = in4_cksum(m, (p == IPPROTO_ICMP ? 0 : p), off, len); 9559 break; 9560 #ifdef INET6 9561 case AF_INET6: 9562 if (m->m_len < sizeof(struct ip6_hdr)) 9563 return (1); 9564 sum = in6_cksum(m, p, off, len); 9565 break; 9566 #endif /* INET6 */ 9567 } 9568 } 9569 if (sum) { 9570 switch (p) { 9571 case IPPROTO_TCP: 9572 { 9573 KMOD_TCPSTAT_INC(tcps_rcvbadsum); 9574 break; 9575 } 9576 case IPPROTO_UDP: 9577 { 9578 KMOD_UDPSTAT_INC(udps_badsum); 9579 break; 9580 } 9581 #ifdef INET 9582 case IPPROTO_ICMP: 9583 { 9584 KMOD_ICMPSTAT_INC(icps_checksum); 9585 break; 9586 } 9587 #endif 9588 #ifdef INET6 9589 case IPPROTO_ICMPV6: 9590 { 9591 KMOD_ICMP6STAT_INC(icp6s_checksum); 9592 break; 9593 } 9594 #endif /* INET6 */ 9595 } 9596 return (1); 9597 } else { 9598 if (p == IPPROTO_TCP || p == IPPROTO_UDP) { 9599 m->m_pkthdr.csum_flags |= 9600 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 9601 m->m_pkthdr.csum_data = 0xffff; 9602 } 9603 } 9604 return (0); 9605 } 9606 9607 static bool 9608 pf_pdesc_to_dnflow(const struct pf_pdesc *pd, const struct pf_krule *r, 9609 const struct pf_kstate *s, struct ip_fw_args *dnflow) 9610 { 9611 int dndir = r->direction; 9612 9613 if (s && dndir == PF_INOUT) { 9614 dndir = s->direction; 9615 } else if (dndir == PF_INOUT) { 9616 /* Assume primary direction. Happens when we've set dnpipe in 9617 * the ethernet level code. */ 9618 dndir = pd->dir; 9619 } 9620 9621 if (pd->pf_mtag->flags & PF_MTAG_FLAG_DUMMYNETED) 9622 return (false); 9623 9624 memset(dnflow, 0, sizeof(*dnflow)); 9625 9626 if (pd->dport != NULL) 9627 dnflow->f_id.dst_port = ntohs(*pd->dport); 9628 if (pd->sport != NULL) 9629 dnflow->f_id.src_port = ntohs(*pd->sport); 9630 9631 if (pd->dir == PF_IN) 9632 dnflow->flags |= IPFW_ARGS_IN; 9633 else 9634 dnflow->flags |= IPFW_ARGS_OUT; 9635 9636 if (pd->dir != dndir && pd->act.dnrpipe) { 9637 dnflow->rule.info = pd->act.dnrpipe; 9638 } 9639 else if (pd->dir == dndir && pd->act.dnpipe) { 9640 dnflow->rule.info = pd->act.dnpipe; 9641 } 9642 else { 9643 return (false); 9644 } 9645 9646 dnflow->rule.info |= IPFW_IS_DUMMYNET; 9647 if (r->free_flags & PFRULE_DN_IS_PIPE || pd->act.flags & PFSTATE_DN_IS_PIPE) 9648 dnflow->rule.info |= IPFW_IS_PIPE; 9649 9650 dnflow->f_id.proto = pd->proto; 9651 dnflow->f_id.extra = dnflow->rule.info; 9652 switch (pd->naf) { 9653 case AF_INET: 9654 dnflow->f_id.addr_type = 4; 9655 dnflow->f_id.src_ip = ntohl(pd->src->v4.s_addr); 9656 dnflow->f_id.dst_ip = ntohl(pd->dst->v4.s_addr); 9657 break; 9658 case AF_INET6: 9659 dnflow->flags |= IPFW_ARGS_IP6; 9660 dnflow->f_id.addr_type = 6; 9661 dnflow->f_id.src_ip6 = pd->src->v6; 9662 dnflow->f_id.dst_ip6 = pd->dst->v6; 9663 break; 9664 } 9665 9666 return (true); 9667 } 9668 9669 int 9670 pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, 9671 struct inpcb *inp) 9672 { 9673 struct pfi_kkif *kif; 9674 struct mbuf *m = *m0; 9675 9676 M_ASSERTPKTHDR(m); 9677 MPASS(ifp->if_vnet == curvnet); 9678 NET_EPOCH_ASSERT(); 9679 9680 if (!V_pf_status.running) 9681 return (PF_PASS); 9682 9683 kif = (struct pfi_kkif *)ifp->if_pf_kif; 9684 9685 if (kif == NULL) { 9686 DPFPRINTF(PF_DEBUG_URGENT, 9687 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 9688 return (PF_DROP); 9689 } 9690 if (kif->pfik_flags & PFI_IFLAG_SKIP) 9691 return (PF_PASS); 9692 9693 if (m->m_flags & M_SKIP_FIREWALL) 9694 return (PF_PASS); 9695 9696 if (__predict_false(! M_WRITABLE(*m0))) { 9697 m = *m0 = m_unshare(*m0, M_NOWAIT); 9698 if (*m0 == NULL) 9699 return (PF_DROP); 9700 } 9701 9702 /* Stateless! */ 9703 return (pf_test_eth_rule(dir, kif, m0)); 9704 } 9705 9706 static __inline void 9707 pf_dummynet_flag_remove(struct mbuf *m, struct pf_mtag *pf_mtag) 9708 { 9709 struct m_tag *mtag; 9710 9711 pf_mtag->flags &= ~PF_MTAG_FLAG_DUMMYNET; 9712 9713 /* dummynet adds this tag, but pf does not need it, 9714 * and keeping it creates unexpected behavior, 9715 * e.g. in case of divert(4) usage right after dummynet. */ 9716 mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); 9717 if (mtag != NULL) 9718 m_tag_delete(m, mtag); 9719 } 9720 9721 static int 9722 pf_dummynet(struct pf_pdesc *pd, struct pf_kstate *s, 9723 struct pf_krule *r, struct mbuf **m0) 9724 { 9725 return (pf_dummynet_route(pd, s, r, NULL, NULL, m0)); 9726 } 9727 9728 static int 9729 pf_dummynet_route(struct pf_pdesc *pd, struct pf_kstate *s, 9730 struct pf_krule *r, struct ifnet *ifp, const struct sockaddr *sa, 9731 struct mbuf **m0) 9732 { 9733 struct ip_fw_args dnflow; 9734 9735 NET_EPOCH_ASSERT(); 9736 9737 if (pd->act.dnpipe == 0 && pd->act.dnrpipe == 0) 9738 return (0); 9739 9740 if (ip_dn_io_ptr == NULL) { 9741 m_freem(*m0); 9742 *m0 = NULL; 9743 return (ENOMEM); 9744 } 9745 9746 if (pd->pf_mtag == NULL && 9747 ((pd->pf_mtag = pf_get_mtag(*m0)) == NULL)) { 9748 m_freem(*m0); 9749 *m0 = NULL; 9750 return (ENOMEM); 9751 } 9752 9753 if (ifp != NULL) { 9754 pd->pf_mtag->flags |= PF_MTAG_FLAG_ROUTE_TO; 9755 9756 pd->pf_mtag->if_index = ifp->if_index; 9757 pd->pf_mtag->if_idxgen = ifp->if_idxgen; 9758 9759 MPASS(sa != NULL); 9760 9761 switch (sa->sa_family) { 9762 case AF_INET: 9763 memcpy(&pd->pf_mtag->dst, sa, 9764 sizeof(struct sockaddr_in)); 9765 break; 9766 case AF_INET6: 9767 memcpy(&pd->pf_mtag->dst, sa, 9768 sizeof(struct sockaddr_in6)); 9769 break; 9770 } 9771 } 9772 9773 if (s != NULL && s->nat_rule != NULL && 9774 s->nat_rule->action == PF_RDR && 9775 ( 9776 #ifdef INET 9777 (pd->af == AF_INET && IN_LOOPBACK(ntohl(pd->dst->v4.s_addr))) || 9778 #endif /* INET */ 9779 (pd->af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd->dst->v6)))) { 9780 /* 9781 * If we're redirecting to loopback mark this packet 9782 * as being local. Otherwise it might get dropped 9783 * if dummynet re-injects. 9784 */ 9785 (*m0)->m_pkthdr.rcvif = V_loif; 9786 } 9787 9788 if (pf_pdesc_to_dnflow(pd, r, s, &dnflow)) { 9789 pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNET; 9790 pd->pf_mtag->flags |= PF_MTAG_FLAG_DUMMYNETED; 9791 ip_dn_io_ptr(m0, &dnflow); 9792 if (*m0 != NULL) { 9793 pd->pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; 9794 pf_dummynet_flag_remove(*m0, pd->pf_mtag); 9795 } 9796 } 9797 9798 return (0); 9799 } 9800 9801 static int 9802 pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end, 9803 u_short *reason) 9804 { 9805 uint8_t type, length, opts[15 * 4 - sizeof(struct ip)]; 9806 9807 /* IP header in payload of ICMP packet may be too short */ 9808 if (pd->m->m_pkthdr.len < end) { 9809 DPFPRINTF(PF_DEBUG_MISC, "IP option too short"); 9810 REASON_SET(reason, PFRES_SHORT); 9811 return (PF_DROP); 9812 } 9813 9814 MPASS(end - off <= sizeof(opts)); 9815 m_copydata(pd->m, off, end - off, opts); 9816 end -= off; 9817 off = 0; 9818 9819 while (off < end) { 9820 type = opts[off]; 9821 if (type == IPOPT_EOL) 9822 break; 9823 if (type == IPOPT_NOP) { 9824 off++; 9825 continue; 9826 } 9827 if (off + 2 > end) { 9828 DPFPRINTF(PF_DEBUG_MISC, "IP length opt"); 9829 REASON_SET(reason, PFRES_IPOPTIONS); 9830 return (PF_DROP); 9831 } 9832 length = opts[off + 1]; 9833 if (length < 2) { 9834 DPFPRINTF(PF_DEBUG_MISC, "IP short opt"); 9835 REASON_SET(reason, PFRES_IPOPTIONS); 9836 return (PF_DROP); 9837 } 9838 if (off + length > end) { 9839 DPFPRINTF(PF_DEBUG_MISC, "IP long opt"); 9840 REASON_SET(reason, PFRES_IPOPTIONS); 9841 return (PF_DROP); 9842 } 9843 switch (type) { 9844 case IPOPT_RA: 9845 pd->badopts |= PF_OPT_ROUTER_ALERT; 9846 break; 9847 default: 9848 pd->badopts |= PF_OPT_OTHER; 9849 break; 9850 } 9851 off += length; 9852 } 9853 9854 return (PF_PASS); 9855 } 9856 9857 static int 9858 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 9859 { 9860 struct ah ext; 9861 u_int32_t hlen, end; 9862 int hdr_cnt; 9863 9864 hlen = h->ip_hl << 2; 9865 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 9866 REASON_SET(reason, PFRES_SHORT); 9867 return (PF_DROP); 9868 } 9869 if (hlen != sizeof(struct ip)) { 9870 if (pf_walk_option(pd, h, pd->off + sizeof(struct ip), 9871 pd->off + hlen, reason) != PF_PASS) 9872 return (PF_DROP); 9873 /* header options which contain only padding is fishy */ 9874 if (pd->badopts == 0) 9875 pd->badopts |= PF_OPT_OTHER; 9876 } 9877 end = pd->off + ntohs(h->ip_len); 9878 pd->off += hlen; 9879 pd->proto = h->ip_p; 9880 /* IGMP packets have router alert options, allow them */ 9881 if (pd->proto == IPPROTO_IGMP) { 9882 /* According to RFC 1112 ttl must be set to 1. */ 9883 if ((h->ip_ttl != 1) || 9884 !IN_MULTICAST(ntohl(h->ip_dst.s_addr))) { 9885 DPFPRINTF(PF_DEBUG_MISC, "Invalid IGMP"); 9886 REASON_SET(reason, PFRES_IPOPTIONS); 9887 return (PF_DROP); 9888 } 9889 pd->badopts &= ~PF_OPT_ROUTER_ALERT; 9890 } 9891 /* stop walking over non initial fragments */ 9892 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 9893 return (PF_PASS); 9894 for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) { 9895 switch (pd->proto) { 9896 case IPPROTO_AH: 9897 /* fragments may be short */ 9898 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 9899 end < pd->off + sizeof(ext)) 9900 return (PF_PASS); 9901 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 9902 NULL, reason, AF_INET)) { 9903 DPFPRINTF(PF_DEBUG_MISC, "IP short exthdr"); 9904 return (PF_DROP); 9905 } 9906 pd->off += (ext.ah_len + 2) * 4; 9907 pd->proto = ext.ah_nxt; 9908 break; 9909 default: 9910 return (PF_PASS); 9911 } 9912 } 9913 DPFPRINTF(PF_DEBUG_MISC, "IPv4 nested authentication header limit"); 9914 REASON_SET(reason, PFRES_IPOPTIONS); 9915 return (PF_DROP); 9916 } 9917 9918 #ifdef INET6 9919 static int 9920 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 9921 u_short *reason) 9922 { 9923 struct ip6_opt opt; 9924 struct ip6_opt_jumbo jumbo; 9925 9926 while (off < end) { 9927 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 9928 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 9929 DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt type"); 9930 return (PF_DROP); 9931 } 9932 if (opt.ip6o_type == IP6OPT_PAD1) { 9933 off++; 9934 continue; 9935 } 9936 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), NULL, 9937 reason, AF_INET6)) { 9938 DPFPRINTF(PF_DEBUG_MISC, "IPv6 short opt"); 9939 return (PF_DROP); 9940 } 9941 if (off + sizeof(opt) + opt.ip6o_len > end) { 9942 DPFPRINTF(PF_DEBUG_MISC, "IPv6 long opt"); 9943 REASON_SET(reason, PFRES_IPOPTIONS); 9944 return (PF_DROP); 9945 } 9946 switch (opt.ip6o_type) { 9947 case IP6OPT_PADN: 9948 break; 9949 case IP6OPT_JUMBO: 9950 pd->badopts |= PF_OPT_JUMBO; 9951 if (pd->jumbolen != 0) { 9952 DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple jumbo"); 9953 REASON_SET(reason, PFRES_IPOPTIONS); 9954 return (PF_DROP); 9955 } 9956 if (ntohs(h->ip6_plen) != 0) { 9957 DPFPRINTF(PF_DEBUG_MISC, "IPv6 bad jumbo plen"); 9958 REASON_SET(reason, PFRES_IPOPTIONS); 9959 return (PF_DROP); 9960 } 9961 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), NULL, 9962 reason, AF_INET6)) { 9963 DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbo"); 9964 return (PF_DROP); 9965 } 9966 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 9967 sizeof(pd->jumbolen)); 9968 pd->jumbolen = ntohl(pd->jumbolen); 9969 if (pd->jumbolen < IPV6_MAXPACKET) { 9970 DPFPRINTF(PF_DEBUG_MISC, "IPv6 short jumbolen"); 9971 REASON_SET(reason, PFRES_IPOPTIONS); 9972 return (PF_DROP); 9973 } 9974 break; 9975 case IP6OPT_ROUTER_ALERT: 9976 pd->badopts |= PF_OPT_ROUTER_ALERT; 9977 break; 9978 default: 9979 pd->badopts |= PF_OPT_OTHER; 9980 break; 9981 } 9982 off += sizeof(opt) + opt.ip6o_len; 9983 } 9984 9985 return (PF_PASS); 9986 } 9987 9988 int 9989 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 9990 { 9991 struct ip6_frag frag; 9992 struct ip6_ext ext; 9993 struct icmp6_hdr icmp6; 9994 struct ip6_rthdr rthdr; 9995 uint32_t end; 9996 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 9997 9998 pd->off += sizeof(struct ip6_hdr); 9999 end = pd->off + ntohs(h->ip6_plen); 10000 pd->fragoff = pd->extoff = pd->jumbolen = 0; 10001 pd->proto = h->ip6_nxt; 10002 for (hdr_cnt = 0; hdr_cnt < PF_HDR_LIMIT; hdr_cnt++) { 10003 switch (pd->proto) { 10004 case IPPROTO_ROUTING: 10005 case IPPROTO_DSTOPTS: 10006 pd->badopts |= PF_OPT_OTHER; 10007 break; 10008 case IPPROTO_HOPOPTS: 10009 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 10010 NULL, reason, AF_INET6)) { 10011 DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr"); 10012 return (PF_DROP); 10013 } 10014 if (pf_walk_option6(pd, h, pd->off + sizeof(ext), 10015 pd->off + (ext.ip6e_len + 1) * 8, 10016 reason) != PF_PASS) 10017 return (PF_DROP); 10018 /* option header which contains only padding is fishy */ 10019 if (pd->badopts == 0) 10020 pd->badopts |= PF_OPT_OTHER; 10021 break; 10022 } 10023 switch (pd->proto) { 10024 case IPPROTO_FRAGMENT: 10025 if (fraghdr_cnt++) { 10026 DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple fragment"); 10027 REASON_SET(reason, PFRES_FRAG); 10028 return (PF_DROP); 10029 } 10030 /* jumbo payload packets cannot be fragmented */ 10031 if (pd->jumbolen != 0) { 10032 DPFPRINTF(PF_DEBUG_MISC, "IPv6 fragmented jumbo"); 10033 REASON_SET(reason, PFRES_FRAG); 10034 return (PF_DROP); 10035 } 10036 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 10037 NULL, reason, AF_INET6)) { 10038 DPFPRINTF(PF_DEBUG_MISC, "IPv6 short fragment"); 10039 return (PF_DROP); 10040 } 10041 /* stop walking over non initial fragments */ 10042 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 10043 pd->fragoff = pd->off; 10044 return (PF_PASS); 10045 } 10046 /* RFC6946: reassemble only non atomic fragments */ 10047 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 10048 pd->fragoff = pd->off; 10049 pd->off += sizeof(frag); 10050 pd->proto = frag.ip6f_nxt; 10051 break; 10052 case IPPROTO_ROUTING: 10053 if (rthdr_cnt++) { 10054 DPFPRINTF(PF_DEBUG_MISC, "IPv6 multiple rthdr"); 10055 REASON_SET(reason, PFRES_IPOPTIONS); 10056 return (PF_DROP); 10057 } 10058 /* fragments may be short */ 10059 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 10060 pd->off = pd->fragoff; 10061 pd->proto = IPPROTO_FRAGMENT; 10062 return (PF_PASS); 10063 } 10064 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 10065 NULL, reason, AF_INET6)) { 10066 DPFPRINTF(PF_DEBUG_MISC, "IPv6 short rthdr"); 10067 return (PF_DROP); 10068 } 10069 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 10070 DPFPRINTF(PF_DEBUG_MISC, "IPv6 rthdr0"); 10071 REASON_SET(reason, PFRES_IPOPTIONS); 10072 return (PF_DROP); 10073 } 10074 /* FALLTHROUGH */ 10075 case IPPROTO_HOPOPTS: 10076 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 10077 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 10078 DPFPRINTF(PF_DEBUG_MISC, "IPv6 hopopts not first"); 10079 REASON_SET(reason, PFRES_IPOPTIONS); 10080 return (PF_DROP); 10081 } 10082 /* FALLTHROUGH */ 10083 case IPPROTO_AH: 10084 case IPPROTO_DSTOPTS: 10085 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 10086 NULL, reason, AF_INET6)) { 10087 DPFPRINTF(PF_DEBUG_MISC, "IPv6 short exthdr"); 10088 return (PF_DROP); 10089 } 10090 /* fragments may be short */ 10091 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 10092 pd->off = pd->fragoff; 10093 pd->proto = IPPROTO_FRAGMENT; 10094 return (PF_PASS); 10095 } 10096 /* reassembly needs the ext header before the frag */ 10097 if (pd->fragoff == 0) 10098 pd->extoff = pd->off; 10099 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 && 10100 ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) { 10101 DPFPRINTF(PF_DEBUG_MISC, "IPv6 missing jumbo"); 10102 REASON_SET(reason, PFRES_IPOPTIONS); 10103 return (PF_DROP); 10104 } 10105 if (pd->proto == IPPROTO_AH) 10106 pd->off += (ext.ip6e_len + 2) * 4; 10107 else 10108 pd->off += (ext.ip6e_len + 1) * 8; 10109 pd->proto = ext.ip6e_nxt; 10110 break; 10111 case IPPROTO_ICMPV6: 10112 /* fragments may be short, ignore inner header then */ 10113 if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) { 10114 pd->off = pd->fragoff; 10115 pd->proto = IPPROTO_FRAGMENT; 10116 return (PF_PASS); 10117 } 10118 if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6), 10119 NULL, reason, AF_INET6)) { 10120 DPFPRINTF(PF_DEBUG_MISC, 10121 "IPv6 short icmp6hdr"); 10122 return (PF_DROP); 10123 } 10124 /* ICMP multicast packets have router alert options */ 10125 switch (icmp6.icmp6_type) { 10126 case MLD_LISTENER_QUERY: 10127 case MLD_LISTENER_REPORT: 10128 case MLD_LISTENER_DONE: 10129 case MLDV2_LISTENER_REPORT: 10130 /* 10131 * According to RFC 2710 all MLD messages are 10132 * sent with hop-limit (ttl) set to 1, and link 10133 * local source address. If either one is 10134 * missing then MLD message is invalid and 10135 * should be discarded. 10136 */ 10137 if ((h->ip6_hlim != 1) || 10138 !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) { 10139 DPFPRINTF(PF_DEBUG_MISC, "Invalid MLD"); 10140 REASON_SET(reason, PFRES_IPOPTIONS); 10141 return (PF_DROP); 10142 } 10143 pd->badopts &= ~PF_OPT_ROUTER_ALERT; 10144 break; 10145 } 10146 return (PF_PASS); 10147 case IPPROTO_TCP: 10148 case IPPROTO_UDP: 10149 case IPPROTO_SCTP: 10150 /* fragments may be short, ignore inner header then */ 10151 if (pd->fragoff != 0 && end < pd->off + 10152 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 10153 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 10154 pd->proto == IPPROTO_SCTP ? sizeof(struct sctphdr) : 10155 sizeof(struct icmp6_hdr))) { 10156 pd->off = pd->fragoff; 10157 pd->proto = IPPROTO_FRAGMENT; 10158 } 10159 /* FALLTHROUGH */ 10160 default: 10161 return (PF_PASS); 10162 } 10163 } 10164 DPFPRINTF(PF_DEBUG_MISC, "IPv6 nested extension header limit"); 10165 REASON_SET(reason, PFRES_IPOPTIONS); 10166 return (PF_DROP); 10167 } 10168 #endif /* INET6 */ 10169 10170 static void 10171 pf_init_pdesc(struct pf_pdesc *pd, struct mbuf *m) 10172 { 10173 memset(pd, 0, sizeof(*pd)); 10174 pd->pf_mtag = pf_find_mtag(m); 10175 pd->m = m; 10176 } 10177 10178 static int 10179 pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, 10180 u_short *action, u_short *reason, struct pfi_kkif *kif, 10181 struct pf_rule_actions *default_actions) 10182 { 10183 pd->dir = dir; 10184 pd->kif = kif; 10185 pd->m = *m0; 10186 pd->sidx = (dir == PF_IN) ? 0 : 1; 10187 pd->didx = (dir == PF_IN) ? 1 : 0; 10188 pd->af = pd->naf = af; 10189 10190 PF_RULES_ASSERT(); 10191 10192 TAILQ_INIT(&pd->sctp_multihome_jobs); 10193 if (default_actions != NULL) 10194 memcpy(&pd->act, default_actions, sizeof(pd->act)); 10195 10196 if (pd->pf_mtag && pd->pf_mtag->dnpipe) { 10197 pd->act.dnpipe = pd->pf_mtag->dnpipe; 10198 pd->act.flags = pd->pf_mtag->dnflags; 10199 } 10200 10201 switch (af) { 10202 #ifdef INET 10203 case AF_INET: { 10204 struct ip *h; 10205 10206 if (__predict_false((*m0)->m_len < sizeof(struct ip)) && 10207 (pd->m = *m0 = m_pullup(*m0, sizeof(struct ip))) == NULL) { 10208 DPFPRINTF(PF_DEBUG_URGENT, 10209 "%s: m_len < sizeof(struct ip), pullup failed", 10210 __func__); 10211 *action = PF_DROP; 10212 REASON_SET(reason, PFRES_SHORT); 10213 return (-1); 10214 } 10215 10216 h = mtod(pd->m, struct ip *); 10217 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 10218 *action = PF_DROP; 10219 REASON_SET(reason, PFRES_SHORT); 10220 return (-1); 10221 } 10222 10223 if (pf_normalize_ip(reason, pd) != PF_PASS) { 10224 /* We do IP header normalization and packet reassembly here */ 10225 *m0 = pd->m; 10226 *action = PF_DROP; 10227 return (-1); 10228 } 10229 *m0 = pd->m; 10230 h = mtod(pd->m, struct ip *); 10231 10232 if (pf_walk_header(pd, h, reason) != PF_PASS) { 10233 *action = PF_DROP; 10234 return (-1); 10235 } 10236 10237 pd->src = (struct pf_addr *)&h->ip_src; 10238 pd->dst = (struct pf_addr *)&h->ip_dst; 10239 pf_addrcpy(&pd->osrc, pd->src, af); 10240 pf_addrcpy(&pd->odst, pd->dst, af); 10241 pd->ip_sum = &h->ip_sum; 10242 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 10243 pd->ttl = h->ip_ttl; 10244 pd->tot_len = ntohs(h->ip_len); 10245 pd->act.rtableid = -1; 10246 pd->df = h->ip_off & htons(IP_DF); 10247 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 10248 PF_VPROTO_FRAGMENT : pd->proto; 10249 10250 break; 10251 } 10252 #endif /* INET */ 10253 #ifdef INET6 10254 case AF_INET6: { 10255 struct ip6_hdr *h; 10256 10257 if (__predict_false((*m0)->m_len < sizeof(struct ip6_hdr)) && 10258 (pd->m = *m0 = m_pullup(*m0, sizeof(struct ip6_hdr))) == NULL) { 10259 DPFPRINTF(PF_DEBUG_URGENT, 10260 "%s: m_len < sizeof(struct ip6_hdr)" 10261 ", pullup failed", __func__); 10262 *action = PF_DROP; 10263 REASON_SET(reason, PFRES_SHORT); 10264 return (-1); 10265 } 10266 10267 h = mtod(pd->m, struct ip6_hdr *); 10268 if (pd->m->m_pkthdr.len < 10269 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 10270 *action = PF_DROP; 10271 REASON_SET(reason, PFRES_SHORT); 10272 return (-1); 10273 } 10274 10275 /* 10276 * we do not support jumbogram. if we keep going, zero ip6_plen 10277 * will do something bad, so drop the packet for now. 10278 */ 10279 if (htons(h->ip6_plen) == 0) { 10280 *action = PF_DROP; 10281 return (-1); 10282 } 10283 10284 if (pf_walk_header6(pd, h, reason) != PF_PASS) { 10285 *action = PF_DROP; 10286 return (-1); 10287 } 10288 10289 h = mtod(pd->m, struct ip6_hdr *); 10290 pd->src = (struct pf_addr *)&h->ip6_src; 10291 pd->dst = (struct pf_addr *)&h->ip6_dst; 10292 pf_addrcpy(&pd->osrc, pd->src, af); 10293 pf_addrcpy(&pd->odst, pd->dst, af); 10294 pd->ip_sum = NULL; 10295 pd->tos = IPV6_DSCP(h); 10296 pd->ttl = h->ip6_hlim; 10297 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 10298 pd->act.rtableid = -1; 10299 10300 pd->virtual_proto = (pd->fragoff != 0) ? 10301 PF_VPROTO_FRAGMENT : pd->proto; 10302 10303 /* We do IP header normalization and packet reassembly here */ 10304 if (pf_normalize_ip6(pd->fragoff, reason, pd) != 10305 PF_PASS) { 10306 *m0 = pd->m; 10307 *action = PF_DROP; 10308 return (-1); 10309 } 10310 *m0 = pd->m; 10311 if (pd->m == NULL) { 10312 /* packet sits in reassembly queue, no error */ 10313 *action = PF_PASS; 10314 return (-1); 10315 } 10316 10317 /* Update pointers into the packet. */ 10318 h = mtod(pd->m, struct ip6_hdr *); 10319 pd->src = (struct pf_addr *)&h->ip6_src; 10320 pd->dst = (struct pf_addr *)&h->ip6_dst; 10321 10322 pd->off = 0; 10323 10324 if (pf_walk_header6(pd, h, reason) != PF_PASS) { 10325 *action = PF_DROP; 10326 return (-1); 10327 } 10328 10329 if (m_tag_find(pd->m, PACKET_TAG_PF_REASSEMBLED, NULL) != NULL) { 10330 /* 10331 * Reassembly may have changed the next protocol from 10332 * fragment to something else, so update. 10333 */ 10334 pd->virtual_proto = pd->proto; 10335 MPASS(pd->fragoff == 0); 10336 } 10337 10338 if (pd->fragoff != 0) 10339 pd->virtual_proto = PF_VPROTO_FRAGMENT; 10340 10341 break; 10342 } 10343 #endif /* INET6 */ 10344 default: 10345 panic("pf_setup_pdesc called with illegal af %u", af); 10346 } 10347 10348 switch (pd->virtual_proto) { 10349 case IPPROTO_TCP: { 10350 struct tcphdr *th = &pd->hdr.tcp; 10351 10352 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), action, 10353 reason, af)) { 10354 *action = PF_DROP; 10355 REASON_SET(reason, PFRES_SHORT); 10356 return (-1); 10357 } 10358 pd->hdrlen = sizeof(*th); 10359 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 10360 pd->sport = &th->th_sport; 10361 pd->dport = &th->th_dport; 10362 pd->pcksum = &th->th_sum; 10363 break; 10364 } 10365 case IPPROTO_UDP: { 10366 struct udphdr *uh = &pd->hdr.udp; 10367 10368 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), action, 10369 reason, af)) { 10370 *action = PF_DROP; 10371 REASON_SET(reason, PFRES_SHORT); 10372 return (-1); 10373 } 10374 pd->hdrlen = sizeof(*uh); 10375 if (uh->uh_dport == 0 || 10376 ntohs(uh->uh_ulen) > pd->m->m_pkthdr.len - pd->off || 10377 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 10378 *action = PF_DROP; 10379 REASON_SET(reason, PFRES_SHORT); 10380 return (-1); 10381 } 10382 pd->sport = &uh->uh_sport; 10383 pd->dport = &uh->uh_dport; 10384 pd->pcksum = &uh->uh_sum; 10385 break; 10386 } 10387 case IPPROTO_SCTP: { 10388 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.sctp, sizeof(pd->hdr.sctp), 10389 action, reason, af)) { 10390 *action = PF_DROP; 10391 REASON_SET(reason, PFRES_SHORT); 10392 return (-1); 10393 } 10394 pd->hdrlen = sizeof(pd->hdr.sctp); 10395 pd->p_len = pd->tot_len - pd->off; 10396 10397 pd->sport = &pd->hdr.sctp.src_port; 10398 pd->dport = &pd->hdr.sctp.dest_port; 10399 if (pd->hdr.sctp.src_port == 0 || pd->hdr.sctp.dest_port == 0) { 10400 *action = PF_DROP; 10401 REASON_SET(reason, PFRES_SHORT); 10402 return (-1); 10403 } 10404 if (pf_scan_sctp(pd) != PF_PASS) { 10405 *action = PF_DROP; 10406 REASON_SET(reason, PFRES_SHORT); 10407 return (-1); 10408 } 10409 /* 10410 * Placeholder. The SCTP checksum is 32-bits, but 10411 * pf_test_state() expects to update a 16-bit checksum. 10412 * Provide a dummy value which we'll subsequently ignore. 10413 */ 10414 pd->pcksum = &pd->sctp_dummy_sum; 10415 break; 10416 } 10417 case IPPROTO_ICMP: { 10418 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 10419 action, reason, af)) { 10420 *action = PF_DROP; 10421 REASON_SET(reason, PFRES_SHORT); 10422 return (-1); 10423 } 10424 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 10425 pd->hdrlen = ICMP_MINLEN; 10426 break; 10427 } 10428 #ifdef INET6 10429 case IPPROTO_ICMPV6: { 10430 size_t icmp_hlen = sizeof(struct icmp6_hdr); 10431 10432 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 10433 action, reason, af)) { 10434 *action = PF_DROP; 10435 REASON_SET(reason, PFRES_SHORT); 10436 return (-1); 10437 } 10438 /* ICMP headers we look further into to match state */ 10439 switch (pd->hdr.icmp6.icmp6_type) { 10440 case MLD_LISTENER_QUERY: 10441 case MLD_LISTENER_REPORT: 10442 icmp_hlen = sizeof(struct mld_hdr); 10443 break; 10444 case ND_NEIGHBOR_SOLICIT: 10445 case ND_NEIGHBOR_ADVERT: 10446 icmp_hlen = sizeof(struct nd_neighbor_solicit); 10447 /* FALLTHROUGH */ 10448 case ND_ROUTER_SOLICIT: 10449 case ND_ROUTER_ADVERT: 10450 case ND_REDIRECT: 10451 if (pd->ttl != 255) { 10452 REASON_SET(reason, PFRES_NORM); 10453 return (PF_DROP); 10454 } 10455 break; 10456 } 10457 if (icmp_hlen > sizeof(struct icmp6_hdr) && 10458 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 10459 action, reason, af)) { 10460 *action = PF_DROP; 10461 REASON_SET(reason, PFRES_SHORT); 10462 return (-1); 10463 } 10464 pd->hdrlen = icmp_hlen; 10465 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 10466 break; 10467 } 10468 #endif /* INET6 */ 10469 } 10470 10471 if (pd->sport) 10472 pd->osport = pd->nsport = *pd->sport; 10473 if (pd->dport) 10474 pd->odport = pd->ndport = *pd->dport; 10475 10476 return (0); 10477 } 10478 10479 static void 10480 pf_counters_inc(int action, struct pf_pdesc *pd, 10481 struct pf_kstate *s, struct pf_krule *r, struct pf_krule *a) 10482 { 10483 struct pf_krule *tr; 10484 int dir = pd->dir; 10485 int dirndx; 10486 10487 pf_counter_u64_critical_enter(); 10488 pf_counter_u64_add_protected( 10489 &pd->kif->pfik_bytes[pd->af == AF_INET6][dir == PF_OUT][action != PF_PASS], 10490 pd->tot_len); 10491 pf_counter_u64_add_protected( 10492 &pd->kif->pfik_packets[pd->af == AF_INET6][dir == PF_OUT][action != PF_PASS], 10493 1); 10494 10495 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 10496 dirndx = (dir == PF_OUT); 10497 pf_counter_u64_add_protected(&r->packets[dirndx], 1); 10498 pf_counter_u64_add_protected(&r->bytes[dirndx], pd->tot_len); 10499 pf_update_timestamp(r); 10500 10501 if (a != NULL) { 10502 pf_counter_u64_add_protected(&a->packets[dirndx], 1); 10503 pf_counter_u64_add_protected(&a->bytes[dirndx], pd->tot_len); 10504 } 10505 if (s != NULL) { 10506 struct pf_krule_item *ri; 10507 10508 if (s->nat_rule != NULL) { 10509 pf_counter_u64_add_protected(&s->nat_rule->packets[dirndx], 10510 1); 10511 pf_counter_u64_add_protected(&s->nat_rule->bytes[dirndx], 10512 pd->tot_len); 10513 } 10514 /* 10515 * Source nodes are accessed unlocked here. 10516 * But since we are operating with stateful tracking 10517 * and the state is locked, those SNs could not have 10518 * been freed. 10519 */ 10520 for (pf_sn_types_t sn_type=0; sn_type<PF_SN_MAX; sn_type++) { 10521 if (s->sns[sn_type] != NULL) { 10522 counter_u64_add( 10523 s->sns[sn_type]->packets[dirndx], 10524 1); 10525 counter_u64_add( 10526 s->sns[sn_type]->bytes[dirndx], 10527 pd->tot_len); 10528 } 10529 } 10530 dirndx = (dir == s->direction) ? 0 : 1; 10531 s->packets[dirndx]++; 10532 s->bytes[dirndx] += pd->tot_len; 10533 10534 SLIST_FOREACH(ri, &s->match_rules, entry) { 10535 pf_counter_u64_add_protected(&ri->r->packets[dirndx], 1); 10536 pf_counter_u64_add_protected(&ri->r->bytes[dirndx], pd->tot_len); 10537 10538 if (ri->r->src.addr.type == PF_ADDR_TABLE) 10539 pfr_update_stats(ri->r->src.addr.p.tbl, 10540 (s == NULL) ? pd->src : 10541 &s->key[(s->direction == PF_IN)]-> 10542 addr[(s->direction == PF_OUT)], 10543 pd->af, pd->tot_len, dir == PF_OUT, 10544 r->action == PF_PASS, ri->r->src.neg); 10545 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 10546 pfr_update_stats(ri->r->dst.addr.p.tbl, 10547 (s == NULL) ? pd->dst : 10548 &s->key[(s->direction == PF_IN)]-> 10549 addr[(s->direction == PF_IN)], 10550 pd->af, pd->tot_len, dir == PF_OUT, 10551 r->action == PF_PASS, ri->r->dst.neg); 10552 } 10553 } 10554 10555 tr = r; 10556 if (s != NULL && s->nat_rule != NULL && 10557 r == &V_pf_default_rule) 10558 tr = s->nat_rule; 10559 10560 if (tr->src.addr.type == PF_ADDR_TABLE) 10561 pfr_update_stats(tr->src.addr.p.tbl, 10562 (s == NULL) ? pd->src : 10563 &s->key[(s->direction == PF_IN)]-> 10564 addr[(s->direction == PF_OUT)], 10565 pd->af, pd->tot_len, dir == PF_OUT, 10566 r->action == PF_PASS, tr->src.neg); 10567 if (tr->dst.addr.type == PF_ADDR_TABLE) 10568 pfr_update_stats(tr->dst.addr.p.tbl, 10569 (s == NULL) ? pd->dst : 10570 &s->key[(s->direction == PF_IN)]-> 10571 addr[(s->direction == PF_IN)], 10572 pd->af, pd->tot_len, dir == PF_OUT, 10573 r->action == PF_PASS, tr->dst.neg); 10574 } 10575 pf_counter_u64_critical_exit(); 10576 } 10577 static void 10578 pf_log_matches(struct pf_pdesc *pd, struct pf_krule *rm, 10579 struct pf_krule *am, struct pf_kruleset *ruleset, 10580 struct pf_krule_slist *matchrules) 10581 { 10582 struct pf_krule_item *ri; 10583 10584 /* if this is the log(matches) rule, packet has been logged already */ 10585 if (rm->log & PF_LOG_MATCHES) 10586 return; 10587 10588 SLIST_FOREACH(ri, matchrules, entry) 10589 if (ri->r->log & PF_LOG_MATCHES) 10590 PFLOG_PACKET(rm->action, PFRES_MATCH, rm, am, 10591 ruleset, pd, 1, ri->r); 10592 } 10593 10594 #if defined(INET) || defined(INET6) 10595 int 10596 pf_test(sa_family_t af, int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, 10597 struct inpcb *inp, struct pf_rule_actions *default_actions) 10598 { 10599 struct pfi_kkif *kif; 10600 u_short action, reason = 0; 10601 struct m_tag *mtag; 10602 struct pf_krule *a = NULL, *r = &V_pf_default_rule; 10603 struct pf_kstate *s = NULL; 10604 struct pf_kruleset *ruleset = NULL; 10605 struct pf_pdesc pd; 10606 int use_2nd_queue = 0; 10607 uint16_t tag; 10608 10609 PF_RULES_RLOCK_TRACKER; 10610 KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: bad direction %d\n", __func__, dir)); 10611 M_ASSERTPKTHDR(*m0); 10612 NET_EPOCH_ASSERT(); 10613 10614 if (!V_pf_status.running) 10615 return (PF_PASS); 10616 10617 kif = (struct pfi_kkif *)ifp->if_pf_kif; 10618 10619 if (__predict_false(kif == NULL)) { 10620 DPFPRINTF(PF_DEBUG_URGENT, 10621 "%s: kif == NULL, if_xname %s", 10622 __func__, ifp->if_xname); 10623 return (PF_DROP); 10624 } 10625 if (kif->pfik_flags & PFI_IFLAG_SKIP) { 10626 return (PF_PASS); 10627 } 10628 10629 if ((*m0)->m_flags & M_SKIP_FIREWALL) { 10630 return (PF_PASS); 10631 } 10632 10633 if (__predict_false(! M_WRITABLE(*m0))) { 10634 *m0 = m_unshare(*m0, M_NOWAIT); 10635 if (*m0 == NULL) { 10636 return (PF_DROP); 10637 } 10638 } 10639 10640 pf_init_pdesc(&pd, *m0); 10641 10642 if (pd.pf_mtag != NULL && (pd.pf_mtag->flags & PF_MTAG_FLAG_ROUTE_TO)) { 10643 pd.pf_mtag->flags &= ~PF_MTAG_FLAG_ROUTE_TO; 10644 10645 ifp = ifnet_byindexgen(pd.pf_mtag->if_index, 10646 pd.pf_mtag->if_idxgen); 10647 if (ifp == NULL || ifp->if_flags & IFF_DYING) { 10648 m_freem(*m0); 10649 *m0 = NULL; 10650 return (PF_PASS); 10651 } 10652 (ifp->if_output)(ifp, *m0, sintosa(&pd.pf_mtag->dst), NULL); 10653 *m0 = NULL; 10654 return (PF_PASS); 10655 } 10656 10657 if (ip_dn_io_ptr != NULL && pd.pf_mtag != NULL && 10658 pd.pf_mtag->flags & PF_MTAG_FLAG_DUMMYNET) { 10659 /* Dummynet re-injects packets after they've 10660 * completed their delay. We've already 10661 * processed them, so pass unconditionally. */ 10662 10663 /* But only once. We may see the packet multiple times (e.g. 10664 * PFIL_IN/PFIL_OUT). */ 10665 pf_dummynet_flag_remove(pd.m, pd.pf_mtag); 10666 10667 return (PF_PASS); 10668 } 10669 10670 PF_RULES_RLOCK(); 10671 10672 if (pf_setup_pdesc(af, dir, &pd, m0, &action, &reason, 10673 kif, default_actions) == -1) { 10674 if (action != PF_PASS) 10675 pd.act.log |= PF_LOG_FORCE; 10676 goto done; 10677 } 10678 10679 #ifdef INET 10680 if (af == AF_INET && dir == PF_OUT && pflags & PFIL_FWD && 10681 pd.df && (*m0)->m_pkthdr.len > ifp->if_mtu) { 10682 PF_RULES_RUNLOCK(); 10683 icmp_error(*m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 10684 0, ifp->if_mtu); 10685 *m0 = NULL; 10686 return (PF_DROP); 10687 } 10688 #endif /* INET */ 10689 #ifdef INET6 10690 /* 10691 * If we end up changing IP addresses (e.g. binat) the stack may get 10692 * confused and fail to send the icmp6 packet too big error. Just send 10693 * it here, before we do any NAT. 10694 */ 10695 if (af == AF_INET6 && dir == PF_OUT && pflags & PFIL_FWD && 10696 IN6_LINKMTU(ifp) < pf_max_frag_size(*m0)) { 10697 PF_RULES_RUNLOCK(); 10698 icmp6_error(*m0, ICMP6_PACKET_TOO_BIG, 0, IN6_LINKMTU(ifp)); 10699 *m0 = NULL; 10700 return (PF_DROP); 10701 } 10702 #endif /* INET6 */ 10703 10704 if (__predict_false(ip_divert_ptr != NULL) && 10705 ((mtag = m_tag_locate(pd.m, MTAG_PF_DIVERT, 0, NULL)) != NULL)) { 10706 struct pf_divert_mtag *dt = (struct pf_divert_mtag *)(mtag+1); 10707 if ((dt->idir == PF_DIVERT_MTAG_DIR_IN && dir == PF_IN) || 10708 (dt->idir == PF_DIVERT_MTAG_DIR_OUT && dir == PF_OUT)) { 10709 if (pd.pf_mtag == NULL && 10710 ((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) { 10711 action = PF_DROP; 10712 goto done; 10713 } 10714 pd.pf_mtag->flags |= PF_MTAG_FLAG_PACKET_LOOPED; 10715 } 10716 if (pd.pf_mtag && pd.pf_mtag->flags & PF_MTAG_FLAG_FASTFWD_OURS_PRESENT) { 10717 pd.m->m_flags |= M_FASTFWD_OURS; 10718 pd.pf_mtag->flags &= ~PF_MTAG_FLAG_FASTFWD_OURS_PRESENT; 10719 } 10720 m_tag_delete(pd.m, mtag); 10721 10722 mtag = m_tag_locate(pd.m, MTAG_IPFW_RULE, 0, NULL); 10723 if (mtag != NULL) 10724 m_tag_delete(pd.m, mtag); 10725 } 10726 10727 switch (pd.virtual_proto) { 10728 case PF_VPROTO_FRAGMENT: 10729 /* 10730 * handle fragments that aren't reassembled by 10731 * normalization 10732 */ 10733 if (kif == NULL || r == NULL) /* pflog */ 10734 action = PF_DROP; 10735 else 10736 action = pf_test_rule(&r, &s, &pd, &a, 10737 &ruleset, &reason, inp); 10738 if (action != PF_PASS) 10739 REASON_SET(&reason, PFRES_FRAG); 10740 break; 10741 10742 case IPPROTO_TCP: { 10743 /* Respond to SYN with a syncookie. */ 10744 if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) == TH_SYN && 10745 pd.dir == PF_IN && pf_synflood_check(&pd)) { 10746 pf_syncookie_send(&pd); 10747 action = PF_DROP; 10748 break; 10749 } 10750 10751 if ((tcp_get_flags(&pd.hdr.tcp) & TH_ACK) && pd.p_len == 0) 10752 use_2nd_queue = 1; 10753 action = pf_normalize_tcp(&pd); 10754 if (action == PF_DROP) 10755 break; 10756 action = pf_test_state(&s, &pd, &reason); 10757 if (action == PF_PASS || action == PF_AFRT) { 10758 if (V_pfsync_update_state_ptr != NULL) 10759 V_pfsync_update_state_ptr(s); 10760 r = s->rule; 10761 a = s->anchor; 10762 } else if (s == NULL) { 10763 /* Validate remote SYN|ACK, re-create original SYN if 10764 * valid. */ 10765 if ((tcp_get_flags(&pd.hdr.tcp) & (TH_SYN|TH_ACK|TH_RST)) == 10766 TH_ACK && pf_syncookie_validate(&pd) && 10767 pd.dir == PF_IN) { 10768 struct mbuf *msyn; 10769 10770 msyn = pf_syncookie_recreate_syn(&pd); 10771 if (msyn == NULL) { 10772 action = PF_DROP; 10773 break; 10774 } 10775 10776 action = pf_test(af, dir, pflags, ifp, &msyn, inp, 10777 &pd.act); 10778 m_freem(msyn); 10779 if (action != PF_PASS) 10780 break; 10781 10782 action = pf_test_state(&s, &pd, &reason); 10783 if (action != PF_PASS || s == NULL) { 10784 action = PF_DROP; 10785 break; 10786 } 10787 10788 s->src.seqhi = ntohl(pd.hdr.tcp.th_ack) - 1; 10789 s->src.seqlo = ntohl(pd.hdr.tcp.th_seq) - 1; 10790 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_DST); 10791 action = pf_synproxy(&pd, s, &reason); 10792 break; 10793 } else { 10794 action = pf_test_rule(&r, &s, &pd, 10795 &a, &ruleset, &reason, inp); 10796 } 10797 } 10798 break; 10799 } 10800 10801 case IPPROTO_SCTP: 10802 action = pf_normalize_sctp(&pd); 10803 if (action == PF_DROP) 10804 break; 10805 /* fallthrough */ 10806 case IPPROTO_UDP: 10807 default: 10808 action = pf_test_state(&s, &pd, &reason); 10809 if (action == PF_PASS || action == PF_AFRT) { 10810 if (V_pfsync_update_state_ptr != NULL) 10811 V_pfsync_update_state_ptr(s); 10812 r = s->rule; 10813 a = s->anchor; 10814 } else if (s == NULL) { 10815 action = pf_test_rule(&r, &s, 10816 &pd, &a, &ruleset, &reason, inp); 10817 } 10818 break; 10819 10820 case IPPROTO_ICMP: 10821 case IPPROTO_ICMPV6: { 10822 if (pd.virtual_proto == IPPROTO_ICMP && af != AF_INET) { 10823 action = PF_DROP; 10824 REASON_SET(&reason, PFRES_NORM); 10825 DPFPRINTF(PF_DEBUG_MISC, 10826 "dropping IPv6 packet with ICMPv4 payload"); 10827 break; 10828 } 10829 if (pd.virtual_proto == IPPROTO_ICMPV6 && af != AF_INET6) { 10830 action = PF_DROP; 10831 REASON_SET(&reason, PFRES_NORM); 10832 DPFPRINTF(PF_DEBUG_MISC, 10833 "pf: dropping IPv4 packet with ICMPv6 payload"); 10834 break; 10835 } 10836 action = pf_test_state_icmp(&s, &pd, &reason); 10837 if (action == PF_PASS || action == PF_AFRT) { 10838 if (V_pfsync_update_state_ptr != NULL) 10839 V_pfsync_update_state_ptr(s); 10840 r = s->rule; 10841 a = s->anchor; 10842 } else if (s == NULL) 10843 action = pf_test_rule(&r, &s, &pd, 10844 &a, &ruleset, &reason, inp); 10845 break; 10846 } 10847 10848 } 10849 10850 done: 10851 PF_RULES_RUNLOCK(); 10852 10853 if (pd.m == NULL) 10854 goto eat_pkt; 10855 10856 if (s) 10857 memcpy(&pd.act, &s->act, sizeof(s->act)); 10858 10859 if (action == PF_PASS && pd.badopts != 0 && !pd.act.allow_opts) { 10860 action = PF_DROP; 10861 REASON_SET(&reason, PFRES_IPOPTIONS); 10862 pd.act.log = PF_LOG_FORCE; 10863 DPFPRINTF(PF_DEBUG_MISC, 10864 "pf: dropping packet with dangerous headers"); 10865 } 10866 10867 if (pd.act.max_pkt_size && pd.act.max_pkt_size && 10868 pd.tot_len > pd.act.max_pkt_size) { 10869 action = PF_DROP; 10870 REASON_SET(&reason, PFRES_NORM); 10871 pd.act.log = PF_LOG_FORCE; 10872 DPFPRINTF(PF_DEBUG_MISC, 10873 "pf: dropping overly long packet"); 10874 } 10875 10876 if (s) { 10877 uint8_t log = pd.act.log; 10878 memcpy(&pd.act, &s->act, sizeof(struct pf_rule_actions)); 10879 pd.act.log |= log; 10880 tag = s->tag; 10881 } else { 10882 tag = r->tag; 10883 } 10884 10885 if (tag > 0 && pf_tag_packet(&pd, tag)) { 10886 action = PF_DROP; 10887 REASON_SET(&reason, PFRES_MEMORY); 10888 } 10889 10890 pf_scrub(&pd); 10891 if (pd.proto == IPPROTO_TCP && pd.act.max_mss) 10892 pf_normalize_mss(&pd); 10893 10894 if (pd.act.rtableid >= 0) 10895 M_SETFIB(pd.m, pd.act.rtableid); 10896 10897 if (pd.act.flags & PFSTATE_SETPRIO) { 10898 if (pd.tos & IPTOS_LOWDELAY) 10899 use_2nd_queue = 1; 10900 if (vlan_set_pcp(pd.m, pd.act.set_prio[use_2nd_queue])) { 10901 action = PF_DROP; 10902 REASON_SET(&reason, PFRES_MEMORY); 10903 pd.act.log = PF_LOG_FORCE; 10904 DPFPRINTF(PF_DEBUG_MISC, 10905 "pf: failed to allocate 802.1q mtag"); 10906 } 10907 } 10908 10909 #ifdef ALTQ 10910 if (action == PF_PASS && pd.act.qid) { 10911 if (pd.pf_mtag == NULL && 10912 ((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) { 10913 action = PF_DROP; 10914 REASON_SET(&reason, PFRES_MEMORY); 10915 } else { 10916 if (s != NULL) 10917 pd.pf_mtag->qid_hash = pf_state_hash(s); 10918 if (use_2nd_queue || (pd.tos & IPTOS_LOWDELAY)) 10919 pd.pf_mtag->qid = pd.act.pqid; 10920 else 10921 pd.pf_mtag->qid = pd.act.qid; 10922 /* Add hints for ecn. */ 10923 pd.pf_mtag->hdr = mtod(pd.m, void *); 10924 } 10925 } 10926 #endif /* ALTQ */ 10927 10928 /* 10929 * connections redirected to loopback should not match sockets 10930 * bound specifically to loopback due to security implications, 10931 * see tcp_input() and in_pcblookup_listen(). 10932 */ 10933 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 10934 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule != NULL && 10935 (s->nat_rule->action == PF_RDR || 10936 s->nat_rule->action == PF_BINAT) && 10937 pf_is_loopback(af, pd.dst)) 10938 pd.m->m_flags |= M_SKIP_FIREWALL; 10939 10940 if (af == AF_INET && __predict_false(ip_divert_ptr != NULL) && 10941 action == PF_PASS && r->divert.port && !PACKET_LOOPED(&pd)) { 10942 mtag = m_tag_alloc(MTAG_PF_DIVERT, 0, 10943 sizeof(struct pf_divert_mtag), M_NOWAIT | M_ZERO); 10944 if (mtag != NULL) { 10945 ((struct pf_divert_mtag *)(mtag+1))->port = 10946 ntohs(r->divert.port); 10947 ((struct pf_divert_mtag *)(mtag+1))->idir = 10948 (dir == PF_IN) ? PF_DIVERT_MTAG_DIR_IN : 10949 PF_DIVERT_MTAG_DIR_OUT; 10950 10951 if (s) 10952 PF_STATE_UNLOCK(s); 10953 10954 m_tag_prepend(pd.m, mtag); 10955 if (pd.m->m_flags & M_FASTFWD_OURS) { 10956 if (pd.pf_mtag == NULL && 10957 ((pd.pf_mtag = pf_get_mtag(pd.m)) == NULL)) { 10958 action = PF_DROP; 10959 REASON_SET(&reason, PFRES_MEMORY); 10960 pd.act.log = PF_LOG_FORCE; 10961 DPFPRINTF(PF_DEBUG_MISC, 10962 "pf: failed to allocate tag"); 10963 } else { 10964 pd.pf_mtag->flags |= 10965 PF_MTAG_FLAG_FASTFWD_OURS_PRESENT; 10966 pd.m->m_flags &= ~M_FASTFWD_OURS; 10967 } 10968 } 10969 ip_divert_ptr(*m0, dir == PF_IN); 10970 *m0 = NULL; 10971 10972 return (action); 10973 } else { 10974 /* XXX: ipfw has the same behaviour! */ 10975 action = PF_DROP; 10976 REASON_SET(&reason, PFRES_MEMORY); 10977 pd.act.log = PF_LOG_FORCE; 10978 DPFPRINTF(PF_DEBUG_MISC, 10979 "pf: failed to allocate divert tag"); 10980 } 10981 } 10982 /* XXX: Anybody working on it?! */ 10983 if (af == AF_INET6 && r->divert.port) 10984 printf("pf: divert(9) is not supported for IPv6\n"); 10985 10986 /* this flag will need revising if the pkt is forwarded */ 10987 if (pd.pf_mtag) 10988 pd.pf_mtag->flags &= ~PF_MTAG_FLAG_PACKET_LOOPED; 10989 10990 if (pd.act.log) { 10991 struct pf_krule *lr; 10992 struct pf_krule_item *ri; 10993 10994 if (s != NULL && s->nat_rule != NULL && 10995 s->nat_rule->log & PF_LOG_ALL) 10996 lr = s->nat_rule; 10997 else 10998 lr = r; 10999 11000 if (pd.act.log & PF_LOG_FORCE || lr->log & PF_LOG_ALL) 11001 PFLOG_PACKET(action, reason, lr, a, 11002 ruleset, &pd, (s == NULL), NULL); 11003 if (s) { 11004 SLIST_FOREACH(ri, &s->match_rules, entry) 11005 if (ri->r->log & PF_LOG_ALL) 11006 PFLOG_PACKET(action, 11007 reason, ri->r, a, ruleset, &pd, 0, NULL); 11008 } 11009 } 11010 11011 pf_counters_inc(action, &pd, s, r, a); 11012 11013 switch (action) { 11014 case PF_SYNPROXY_DROP: 11015 m_freem(*m0); 11016 case PF_DEFER: 11017 *m0 = NULL; 11018 action = PF_PASS; 11019 break; 11020 case PF_DROP: 11021 m_freem(*m0); 11022 *m0 = NULL; 11023 break; 11024 case PF_AFRT: 11025 if (pf_translate_af(&pd)) { 11026 *m0 = pd.m; 11027 action = PF_DROP; 11028 break; 11029 } 11030 #ifdef INET 11031 if (pd.naf == AF_INET) 11032 pf_route(r, kif->pfik_ifp, s, &pd, inp); 11033 #endif /* INET */ 11034 #ifdef INET6 11035 if (pd.naf == AF_INET6) 11036 pf_route6(r, kif->pfik_ifp, s, &pd, inp); 11037 #endif /* INET6 */ 11038 *m0 = pd.m; 11039 action = PF_PASS; 11040 goto out; 11041 break; 11042 default: 11043 if (pd.act.rt) { 11044 switch (af) { 11045 #ifdef INET 11046 case AF_INET: 11047 /* pf_route() returns unlocked. */ 11048 pf_route(r, kif->pfik_ifp, s, &pd, inp); 11049 break; 11050 #endif /* INET */ 11051 #ifdef INET6 11052 case AF_INET6: 11053 /* pf_route6() returns unlocked. */ 11054 pf_route6(r, kif->pfik_ifp, s, &pd, inp); 11055 break; 11056 #endif /* INET6 */ 11057 } 11058 *m0 = pd.m; 11059 goto out; 11060 } 11061 if (pf_dummynet(&pd, s, r, m0) != 0) { 11062 action = PF_DROP; 11063 REASON_SET(&reason, PFRES_MEMORY); 11064 } 11065 break; 11066 } 11067 11068 eat_pkt: 11069 SDT_PROBE4(pf, ip, test, done, action, reason, r, s); 11070 11071 if (s && action != PF_DROP) { 11072 if (!s->if_index_in && dir == PF_IN) 11073 s->if_index_in = ifp->if_index; 11074 else if (!s->if_index_out && dir == PF_OUT) 11075 s->if_index_out = ifp->if_index; 11076 } 11077 11078 if (s) 11079 PF_STATE_UNLOCK(s); 11080 11081 out: 11082 #ifdef INET6 11083 /* If reassembled packet passed, create new fragments. */ 11084 if (af == AF_INET6 && action == PF_PASS && *m0 && dir == PF_OUT && 11085 (! (pflags & PF_PFIL_NOREFRAGMENT)) && 11086 (mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)) != NULL) 11087 action = pf_refragment6(ifp, m0, mtag, NULL, pflags & PFIL_FWD); 11088 #endif /* INET6 */ 11089 11090 pf_sctp_multihome_delayed(&pd, kif, s, action); 11091 11092 return (action); 11093 } 11094 #endif /* INET || INET6 */ 11095