1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 /* 32 * The FreeBSD IP packet firewall, main file 33 */ 34 35 #include "opt_ipfw.h" 36 #include "opt_ipdivert.h" 37 #include "opt_inet.h" 38 #ifndef INET 39 #error "IPFIREWALL requires INET" 40 #endif /* INET */ 41 #include "opt_inet6.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/condvar.h> 46 #include <sys/counter.h> 47 #include <sys/eventhandler.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/kernel.h> 51 #include <sys/lock.h> 52 #include <sys/jail.h> 53 #include <sys/module.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/rwlock.h> 57 #include <sys/rmlock.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/sysctl.h> 61 #include <sys/syslog.h> 62 #include <sys/ucred.h> 63 #include <net/ethernet.h> /* for ETHERTYPE_IP */ 64 #include <net/if.h> 65 #include <net/if_var.h> 66 #include <net/route.h> 67 #include <net/pfil.h> 68 #include <net/vnet.h> 69 70 #include <netpfil/pf/pf_mtag.h> 71 72 #include <netinet/in.h> 73 #include <netinet/in_var.h> 74 #include <netinet/in_pcb.h> 75 #include <netinet/ip.h> 76 #include <netinet/ip_var.h> 77 #include <netinet/ip_icmp.h> 78 #include <netinet/ip_fw.h> 79 #include <netinet/ip_carp.h> 80 #include <netinet/pim.h> 81 #include <netinet/tcp_var.h> 82 #include <netinet/udp.h> 83 #include <netinet/udp_var.h> 84 #include <netinet/sctp.h> 85 #include <netinet/sctp_crc32.h> 86 #include <netinet/sctp_header.h> 87 88 #include <netinet/ip6.h> 89 #include <netinet/icmp6.h> 90 #include <netinet/in_fib.h> 91 #ifdef INET6 92 #include <netinet6/in6_fib.h> 93 #include <netinet6/in6_pcb.h> 94 #include <netinet6/scope6_var.h> 95 #include <netinet6/ip6_var.h> 96 #endif 97 98 #include <net/if_gre.h> /* for struct grehdr */ 99 100 #include <netpfil/ipfw/ip_fw_private.h> 101 102 #include <machine/in_cksum.h> /* XXX for in_cksum */ 103 104 #ifdef MAC 105 #include <security/mac/mac_framework.h> 106 #endif 107 108 /* 109 * static variables followed by global ones. 110 * All ipfw global variables are here. 111 */ 112 113 static VNET_DEFINE(int, fw_deny_unknown_exthdrs); 114 #define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) 115 116 static VNET_DEFINE(int, fw_permit_single_frag6) = 1; 117 #define V_fw_permit_single_frag6 VNET(fw_permit_single_frag6) 118 119 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 120 static int default_to_accept = 1; 121 #else 122 static int default_to_accept; 123 #endif 124 125 VNET_DEFINE(int, autoinc_step); 126 VNET_DEFINE(int, fw_one_pass) = 1; 127 128 VNET_DEFINE(unsigned int, fw_tables_max); 129 VNET_DEFINE(unsigned int, fw_tables_sets) = 0; /* Don't use set-aware tables */ 130 /* Use 128 tables by default */ 131 static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; 132 133 #ifndef LINEAR_SKIPTO 134 static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, 135 int tablearg, int jump_backwards); 136 #define JUMP(ch, f, num, targ, back) jump_fast(ch, f, num, targ, back) 137 #else 138 static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, 139 int tablearg, int jump_backwards); 140 #define JUMP(ch, f, num, targ, back) jump_linear(ch, f, num, targ, back) 141 #endif 142 143 /* 144 * Each rule belongs to one of 32 different sets (0..31). 145 * The variable set_disable contains one bit per set. 146 * If the bit is set, all rules in the corresponding set 147 * are disabled. Set RESVD_SET(31) is reserved for the default rule 148 * and rules that are not deleted by the flush command, 149 * and CANNOT be disabled. 150 * Rules in set RESVD_SET can only be deleted individually. 151 */ 152 VNET_DEFINE(u_int32_t, set_disable); 153 #define V_set_disable VNET(set_disable) 154 155 VNET_DEFINE(int, fw_verbose); 156 /* counter for ipfw_log(NULL...) */ 157 VNET_DEFINE(u_int64_t, norule_counter); 158 VNET_DEFINE(int, verbose_limit); 159 160 /* layer3_chain contains the list of rules for layer 3 */ 161 VNET_DEFINE(struct ip_fw_chain, layer3_chain); 162 163 /* ipfw_vnet_ready controls when we are open for business */ 164 VNET_DEFINE(int, ipfw_vnet_ready) = 0; 165 166 VNET_DEFINE(int, ipfw_nat_ready) = 0; 167 168 ipfw_nat_t *ipfw_nat_ptr = NULL; 169 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); 170 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; 171 ipfw_nat_cfg_t *ipfw_nat_del_ptr; 172 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; 173 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; 174 175 #ifdef SYSCTL_NODE 176 uint32_t dummy_def = IPFW_DEFAULT_RULE; 177 static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS); 178 static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS); 179 180 SYSBEGIN(f3) 181 182 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 183 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, 184 CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, 185 "Only do a single pass through ipfw when using dummynet(4)"); 186 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, 187 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, 188 "Rule number auto-increment step"); 189 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, 190 CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, 191 "Log matches to ipfw rules"); 192 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, 193 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, 194 "Set upper limit of matches of ipfw rules logged"); 195 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, 196 &dummy_def, 0, 197 "The default/max possible rule number."); 198 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max, 199 CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU", 200 "Maximum number of concurrently used tables"); 201 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets, 202 CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 203 0, 0, sysctl_ipfw_tables_sets, "IU", 204 "Use per-set namespace for tables"); 205 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, 206 &default_to_accept, 0, 207 "Make the default rule accept all packets."); 208 TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables); 209 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, 210 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, 211 "Number of static rules"); 212 213 #ifdef INET6 214 SYSCTL_DECL(_net_inet6_ip6); 215 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 216 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, 217 CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, 218 &VNET_NAME(fw_deny_unknown_exthdrs), 0, 219 "Deny packets with unknown IPv6 Extension Headers"); 220 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6, 221 CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, 222 &VNET_NAME(fw_permit_single_frag6), 0, 223 "Permit single packet IPv6 fragments"); 224 #endif /* INET6 */ 225 226 SYSEND 227 228 #endif /* SYSCTL_NODE */ 229 230 231 /* 232 * Some macros used in the various matching options. 233 * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T 234 * Other macros just cast void * into the appropriate type 235 */ 236 #define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) 237 #define TCP(p) ((struct tcphdr *)(p)) 238 #define SCTP(p) ((struct sctphdr *)(p)) 239 #define UDP(p) ((struct udphdr *)(p)) 240 #define ICMP(p) ((struct icmphdr *)(p)) 241 #define ICMP6(p) ((struct icmp6_hdr *)(p)) 242 243 static __inline int 244 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) 245 { 246 int type = icmp->icmp_type; 247 248 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) ); 249 } 250 251 #define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \ 252 (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) ) 253 254 static int 255 is_icmp_query(struct icmphdr *icmp) 256 { 257 int type = icmp->icmp_type; 258 259 return (type <= ICMP_MAXTYPE && (TT & (1<<type)) ); 260 } 261 #undef TT 262 263 /* 264 * The following checks use two arrays of 8 or 16 bits to store the 265 * bits that we want set or clear, respectively. They are in the 266 * low and high half of cmd->arg1 or cmd->d[0]. 267 * 268 * We scan options and store the bits we find set. We succeed if 269 * 270 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 271 * 272 * The code is sometimes optimized not to store additional variables. 273 */ 274 275 static int 276 flags_match(ipfw_insn *cmd, u_int8_t bits) 277 { 278 u_char want_clear; 279 bits = ~bits; 280 281 if ( ((cmd->arg1 & 0xff) & bits) != 0) 282 return 0; /* some bits we want set were clear */ 283 want_clear = (cmd->arg1 >> 8) & 0xff; 284 if ( (want_clear & bits) != want_clear) 285 return 0; /* some bits we want clear were set */ 286 return 1; 287 } 288 289 static int 290 ipopts_match(struct ip *ip, ipfw_insn *cmd) 291 { 292 int optlen, bits = 0; 293 u_char *cp = (u_char *)(ip + 1); 294 int x = (ip->ip_hl << 2) - sizeof (struct ip); 295 296 for (; x > 0; x -= optlen, cp += optlen) { 297 int opt = cp[IPOPT_OPTVAL]; 298 299 if (opt == IPOPT_EOL) 300 break; 301 if (opt == IPOPT_NOP) 302 optlen = 1; 303 else { 304 optlen = cp[IPOPT_OLEN]; 305 if (optlen <= 0 || optlen > x) 306 return 0; /* invalid or truncated */ 307 } 308 switch (opt) { 309 310 default: 311 break; 312 313 case IPOPT_LSRR: 314 bits |= IP_FW_IPOPT_LSRR; 315 break; 316 317 case IPOPT_SSRR: 318 bits |= IP_FW_IPOPT_SSRR; 319 break; 320 321 case IPOPT_RR: 322 bits |= IP_FW_IPOPT_RR; 323 break; 324 325 case IPOPT_TS: 326 bits |= IP_FW_IPOPT_TS; 327 break; 328 } 329 } 330 return (flags_match(cmd, bits)); 331 } 332 333 static int 334 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) 335 { 336 int optlen, bits = 0; 337 u_char *cp = (u_char *)(tcp + 1); 338 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 339 340 for (; x > 0; x -= optlen, cp += optlen) { 341 int opt = cp[0]; 342 if (opt == TCPOPT_EOL) 343 break; 344 if (opt == TCPOPT_NOP) 345 optlen = 1; 346 else { 347 optlen = cp[1]; 348 if (optlen <= 0) 349 break; 350 } 351 352 switch (opt) { 353 354 default: 355 break; 356 357 case TCPOPT_MAXSEG: 358 bits |= IP_FW_TCPOPT_MSS; 359 break; 360 361 case TCPOPT_WINDOW: 362 bits |= IP_FW_TCPOPT_WINDOW; 363 break; 364 365 case TCPOPT_SACK_PERMITTED: 366 case TCPOPT_SACK: 367 bits |= IP_FW_TCPOPT_SACK; 368 break; 369 370 case TCPOPT_TIMESTAMP: 371 bits |= IP_FW_TCPOPT_TS; 372 break; 373 374 } 375 } 376 return (flags_match(cmd, bits)); 377 } 378 379 static int 380 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, 381 uint32_t *tablearg) 382 { 383 384 if (ifp == NULL) /* no iface with this packet, match fails */ 385 return (0); 386 387 /* Check by name or by IP address */ 388 if (cmd->name[0] != '\0') { /* match by name */ 389 if (cmd->name[0] == '\1') /* use tablearg to match */ 390 return ipfw_lookup_table(chain, cmd->p.kidx, 0, 391 &ifp->if_index, tablearg); 392 /* Check name */ 393 if (cmd->p.glob) { 394 if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) 395 return(1); 396 } else { 397 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 398 return(1); 399 } 400 } else { 401 #if !defined(USERSPACE) && defined(__FreeBSD__) /* and OSX too ? */ 402 struct ifaddr *ia; 403 404 if_addr_rlock(ifp); 405 TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { 406 if (ia->ifa_addr->sa_family != AF_INET) 407 continue; 408 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 409 (ia->ifa_addr))->sin_addr.s_addr) { 410 if_addr_runlock(ifp); 411 return(1); /* match */ 412 } 413 } 414 if_addr_runlock(ifp); 415 #endif /* __FreeBSD__ */ 416 } 417 return(0); /* no match, fail ... */ 418 } 419 420 /* 421 * The verify_path function checks if a route to the src exists and 422 * if it is reachable via ifp (when provided). 423 * 424 * The 'verrevpath' option checks that the interface that an IP packet 425 * arrives on is the same interface that traffic destined for the 426 * packet's source address would be routed out of. 427 * The 'versrcreach' option just checks that the source address is 428 * reachable via any route (except default) in the routing table. 429 * These two are a measure to block forged packets. This is also 430 * commonly known as "anti-spoofing" or Unicast Reverse Path 431 * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs 432 * is purposely reminiscent of the Cisco IOS command, 433 * 434 * ip verify unicast reverse-path 435 * ip verify unicast source reachable-via any 436 * 437 * which implements the same functionality. But note that the syntax 438 * is misleading, and the check may be performed on all IP packets 439 * whether unicast, multicast, or broadcast. 440 */ 441 static int 442 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) 443 { 444 #if defined(USERSPACE) || !defined(__FreeBSD__) 445 return 0; 446 #else 447 struct nhop4_basic nh4; 448 449 if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0) 450 return (0); 451 452 /* 453 * If ifp is provided, check for equality with rtentry. 454 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, 455 * in order to pass packets injected back by if_simloop(): 456 * routing entry (via lo0) for our own address 457 * may exist, so we need to handle routing assymetry. 458 */ 459 if (ifp != NULL && ifp != nh4.nh_ifp) 460 return (0); 461 462 /* if no ifp provided, check if rtentry is not default route */ 463 if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0) 464 return (0); 465 466 /* or if this is a blackhole/reject route */ 467 if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0) 468 return (0); 469 470 /* found valid route */ 471 return 1; 472 #endif /* __FreeBSD__ */ 473 } 474 475 /* 476 * Generate an SCTP packet containing an ABORT chunk. The verification tag 477 * is given by vtag. The T-bit is set in the ABORT chunk if and only if 478 * reflected is not 0. 479 */ 480 481 static struct mbuf * 482 ipfw_send_abort(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t vtag, 483 int reflected) 484 { 485 struct mbuf *m; 486 struct ip *ip; 487 #ifdef INET6 488 struct ip6_hdr *ip6; 489 #endif 490 struct sctphdr *sctp; 491 struct sctp_chunkhdr *chunk; 492 u_int16_t hlen, plen, tlen; 493 494 MGETHDR(m, M_NOWAIT, MT_DATA); 495 if (m == NULL) 496 return (NULL); 497 498 M_SETFIB(m, id->fib); 499 #ifdef MAC 500 if (replyto != NULL) 501 mac_netinet_firewall_reply(replyto, m); 502 else 503 mac_netinet_firewall_send(m); 504 #else 505 (void)replyto; /* don't warn about unused arg */ 506 #endif 507 508 switch (id->addr_type) { 509 case 4: 510 hlen = sizeof(struct ip); 511 break; 512 #ifdef INET6 513 case 6: 514 hlen = sizeof(struct ip6_hdr); 515 break; 516 #endif 517 default: 518 /* XXX: log me?!? */ 519 FREE_PKT(m); 520 return (NULL); 521 } 522 plen = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); 523 tlen = hlen + plen; 524 m->m_data += max_linkhdr; 525 m->m_flags |= M_SKIP_FIREWALL; 526 m->m_pkthdr.len = m->m_len = tlen; 527 m->m_pkthdr.rcvif = NULL; 528 bzero(m->m_data, tlen); 529 530 switch (id->addr_type) { 531 case 4: 532 ip = mtod(m, struct ip *); 533 534 ip->ip_v = 4; 535 ip->ip_hl = sizeof(struct ip) >> 2; 536 ip->ip_tos = IPTOS_LOWDELAY; 537 ip->ip_len = htons(tlen); 538 ip->ip_id = htons(0); 539 ip->ip_off = htons(0); 540 ip->ip_ttl = V_ip_defttl; 541 ip->ip_p = IPPROTO_SCTP; 542 ip->ip_sum = 0; 543 ip->ip_src.s_addr = htonl(id->dst_ip); 544 ip->ip_dst.s_addr = htonl(id->src_ip); 545 546 sctp = (struct sctphdr *)(ip + 1); 547 break; 548 #ifdef INET6 549 case 6: 550 ip6 = mtod(m, struct ip6_hdr *); 551 552 ip6->ip6_vfc = IPV6_VERSION; 553 ip6->ip6_plen = htons(plen); 554 ip6->ip6_nxt = IPPROTO_SCTP; 555 ip6->ip6_hlim = IPV6_DEFHLIM; 556 ip6->ip6_src = id->dst_ip6; 557 ip6->ip6_dst = id->src_ip6; 558 559 sctp = (struct sctphdr *)(ip6 + 1); 560 break; 561 #endif 562 } 563 564 sctp->src_port = htons(id->dst_port); 565 sctp->dest_port = htons(id->src_port); 566 sctp->v_tag = htonl(vtag); 567 sctp->checksum = htonl(0); 568 569 chunk = (struct sctp_chunkhdr *)(sctp + 1); 570 chunk->chunk_type = SCTP_ABORT_ASSOCIATION; 571 chunk->chunk_flags = 0; 572 if (reflected != 0) { 573 chunk->chunk_flags |= SCTP_HAD_NO_TCB; 574 } 575 chunk->chunk_length = htons(sizeof(struct sctp_chunkhdr)); 576 577 sctp->checksum = sctp_calculate_cksum(m, hlen); 578 579 return (m); 580 } 581 582 /* 583 * Generate a TCP packet, containing either a RST or a keepalive. 584 * When flags & TH_RST, we are sending a RST packet, because of a 585 * "reset" action matched the packet. 586 * Otherwise we are sending a keepalive, and flags & TH_ 587 * The 'replyto' mbuf is the mbuf being replied to, if any, and is required 588 * so that MAC can label the reply appropriately. 589 */ 590 struct mbuf * 591 ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, 592 u_int32_t ack, int flags) 593 { 594 struct mbuf *m = NULL; /* stupid compiler */ 595 struct ip *h = NULL; /* stupid compiler */ 596 #ifdef INET6 597 struct ip6_hdr *h6 = NULL; 598 #endif 599 struct tcphdr *th = NULL; 600 int len, dir; 601 602 MGETHDR(m, M_NOWAIT, MT_DATA); 603 if (m == NULL) 604 return (NULL); 605 606 M_SETFIB(m, id->fib); 607 #ifdef MAC 608 if (replyto != NULL) 609 mac_netinet_firewall_reply(replyto, m); 610 else 611 mac_netinet_firewall_send(m); 612 #else 613 (void)replyto; /* don't warn about unused arg */ 614 #endif 615 616 switch (id->addr_type) { 617 case 4: 618 len = sizeof(struct ip) + sizeof(struct tcphdr); 619 break; 620 #ifdef INET6 621 case 6: 622 len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 623 break; 624 #endif 625 default: 626 /* XXX: log me?!? */ 627 FREE_PKT(m); 628 return (NULL); 629 } 630 dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); 631 632 m->m_data += max_linkhdr; 633 m->m_flags |= M_SKIP_FIREWALL; 634 m->m_pkthdr.len = m->m_len = len; 635 m->m_pkthdr.rcvif = NULL; 636 bzero(m->m_data, len); 637 638 switch (id->addr_type) { 639 case 4: 640 h = mtod(m, struct ip *); 641 642 /* prepare for checksum */ 643 h->ip_p = IPPROTO_TCP; 644 h->ip_len = htons(sizeof(struct tcphdr)); 645 if (dir) { 646 h->ip_src.s_addr = htonl(id->src_ip); 647 h->ip_dst.s_addr = htonl(id->dst_ip); 648 } else { 649 h->ip_src.s_addr = htonl(id->dst_ip); 650 h->ip_dst.s_addr = htonl(id->src_ip); 651 } 652 653 th = (struct tcphdr *)(h + 1); 654 break; 655 #ifdef INET6 656 case 6: 657 h6 = mtod(m, struct ip6_hdr *); 658 659 /* prepare for checksum */ 660 h6->ip6_nxt = IPPROTO_TCP; 661 h6->ip6_plen = htons(sizeof(struct tcphdr)); 662 if (dir) { 663 h6->ip6_src = id->src_ip6; 664 h6->ip6_dst = id->dst_ip6; 665 } else { 666 h6->ip6_src = id->dst_ip6; 667 h6->ip6_dst = id->src_ip6; 668 } 669 670 th = (struct tcphdr *)(h6 + 1); 671 break; 672 #endif 673 } 674 675 if (dir) { 676 th->th_sport = htons(id->src_port); 677 th->th_dport = htons(id->dst_port); 678 } else { 679 th->th_sport = htons(id->dst_port); 680 th->th_dport = htons(id->src_port); 681 } 682 th->th_off = sizeof(struct tcphdr) >> 2; 683 684 if (flags & TH_RST) { 685 if (flags & TH_ACK) { 686 th->th_seq = htonl(ack); 687 th->th_flags = TH_RST; 688 } else { 689 if (flags & TH_SYN) 690 seq++; 691 th->th_ack = htonl(seq); 692 th->th_flags = TH_RST | TH_ACK; 693 } 694 } else { 695 /* 696 * Keepalive - use caller provided sequence numbers 697 */ 698 th->th_seq = htonl(seq); 699 th->th_ack = htonl(ack); 700 th->th_flags = TH_ACK; 701 } 702 703 switch (id->addr_type) { 704 case 4: 705 th->th_sum = in_cksum(m, len); 706 707 /* finish the ip header */ 708 h->ip_v = 4; 709 h->ip_hl = sizeof(*h) >> 2; 710 h->ip_tos = IPTOS_LOWDELAY; 711 h->ip_off = htons(0); 712 h->ip_len = htons(len); 713 h->ip_ttl = V_ip_defttl; 714 h->ip_sum = 0; 715 break; 716 #ifdef INET6 717 case 6: 718 th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), 719 sizeof(struct tcphdr)); 720 721 /* finish the ip6 header */ 722 h6->ip6_vfc |= IPV6_VERSION; 723 h6->ip6_hlim = IPV6_DEFHLIM; 724 break; 725 #endif 726 } 727 728 return (m); 729 } 730 731 #ifdef INET6 732 /* 733 * ipv6 specific rules here... 734 */ 735 static __inline int 736 icmp6type_match (int type, ipfw_insn_u32 *cmd) 737 { 738 return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); 739 } 740 741 static int 742 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) 743 { 744 int i; 745 for (i=0; i <= cmd->o.arg1; ++i ) 746 if (curr_flow == cmd->d[i] ) 747 return 1; 748 return 0; 749 } 750 751 /* support for IP6_*_ME opcodes */ 752 static const struct in6_addr lla_mask = {{{ 753 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 754 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 755 }}}; 756 757 static int 758 ipfw_localip6(struct in6_addr *in6) 759 { 760 struct rm_priotracker in6_ifa_tracker; 761 struct in6_ifaddr *ia; 762 763 if (IN6_IS_ADDR_MULTICAST(in6)) 764 return (0); 765 766 if (!IN6_IS_ADDR_LINKLOCAL(in6)) 767 return (in6_localip(in6)); 768 769 IN6_IFADDR_RLOCK(&in6_ifa_tracker); 770 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 771 if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) 772 continue; 773 if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr, 774 in6, &lla_mask)) { 775 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 776 return (1); 777 } 778 } 779 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 780 return (0); 781 } 782 783 static int 784 verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) 785 { 786 struct nhop6_basic nh6; 787 788 if (IN6_IS_SCOPE_LINKLOCAL(src)) 789 return (1); 790 791 if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0) 792 return (0); 793 794 /* If ifp is provided, check for equality with route table. */ 795 if (ifp != NULL && ifp != nh6.nh_ifp) 796 return (0); 797 798 /* if no ifp provided, check if rtentry is not default route */ 799 if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0) 800 return (0); 801 802 /* or if this is a blackhole/reject route */ 803 if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0) 804 return (0); 805 806 /* found valid route */ 807 return 1; 808 } 809 810 static int 811 is_icmp6_query(int icmp6_type) 812 { 813 if ((icmp6_type <= ICMP6_MAXTYPE) && 814 (icmp6_type == ICMP6_ECHO_REQUEST || 815 icmp6_type == ICMP6_MEMBERSHIP_QUERY || 816 icmp6_type == ICMP6_WRUREQUEST || 817 icmp6_type == ICMP6_FQDN_QUERY || 818 icmp6_type == ICMP6_NI_QUERY)) 819 return (1); 820 821 return (0); 822 } 823 824 static void 825 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) 826 { 827 struct mbuf *m; 828 829 m = args->m; 830 if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { 831 struct tcphdr *tcp; 832 tcp = (struct tcphdr *)((char *)ip6 + hlen); 833 834 if ((tcp->th_flags & TH_RST) == 0) { 835 struct mbuf *m0; 836 m0 = ipfw_send_pkt(args->m, &(args->f_id), 837 ntohl(tcp->th_seq), ntohl(tcp->th_ack), 838 tcp->th_flags | TH_RST); 839 if (m0 != NULL) 840 ip6_output(m0, NULL, NULL, 0, NULL, NULL, 841 NULL); 842 } 843 FREE_PKT(m); 844 } else if (code == ICMP6_UNREACH_ABORT && 845 args->f_id.proto == IPPROTO_SCTP) { 846 struct mbuf *m0; 847 struct sctphdr *sctp; 848 u_int32_t v_tag; 849 int reflected; 850 851 sctp = (struct sctphdr *)((char *)ip6 + hlen); 852 reflected = 1; 853 v_tag = ntohl(sctp->v_tag); 854 /* Investigate the first chunk header if available */ 855 if (m->m_len >= hlen + sizeof(struct sctphdr) + 856 sizeof(struct sctp_chunkhdr)) { 857 struct sctp_chunkhdr *chunk; 858 859 chunk = (struct sctp_chunkhdr *)(sctp + 1); 860 switch (chunk->chunk_type) { 861 case SCTP_INITIATION: 862 /* 863 * Packets containing an INIT chunk MUST have 864 * a zero v-tag. 865 */ 866 if (v_tag != 0) { 867 v_tag = 0; 868 break; 869 } 870 /* INIT chunk MUST NOT be bundled */ 871 if (m->m_pkthdr.len > 872 hlen + sizeof(struct sctphdr) + 873 ntohs(chunk->chunk_length) + 3) { 874 break; 875 } 876 /* Use the initiate tag if available */ 877 if ((m->m_len >= hlen + sizeof(struct sctphdr) + 878 sizeof(struct sctp_chunkhdr) + 879 offsetof(struct sctp_init, a_rwnd))) { 880 struct sctp_init *init; 881 882 init = (struct sctp_init *)(chunk + 1); 883 v_tag = ntohl(init->initiate_tag); 884 reflected = 0; 885 } 886 break; 887 case SCTP_ABORT_ASSOCIATION: 888 /* 889 * If the packet contains an ABORT chunk, don't 890 * reply. 891 * XXX: We should search through all chunks, 892 * but don't do to avoid attacks. 893 */ 894 v_tag = 0; 895 break; 896 } 897 } 898 if (v_tag == 0) { 899 m0 = NULL; 900 } else { 901 m0 = ipfw_send_abort(args->m, &(args->f_id), v_tag, 902 reflected); 903 } 904 if (m0 != NULL) 905 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); 906 FREE_PKT(m); 907 } else if (code != ICMP6_UNREACH_RST && code != ICMP6_UNREACH_ABORT) { 908 /* Send an ICMPv6 unreach. */ 909 #if 0 910 /* 911 * Unlike above, the mbufs need to line up with the ip6 hdr, 912 * as the contents are read. We need to m_adj() the 913 * needed amount. 914 * The mbuf will however be thrown away so we can adjust it. 915 * Remember we did an m_pullup on it already so we 916 * can make some assumptions about contiguousness. 917 */ 918 if (args->L3offset) 919 m_adj(m, args->L3offset); 920 #endif 921 icmp6_error(m, ICMP6_DST_UNREACH, code, 0); 922 } else 923 FREE_PKT(m); 924 925 args->m = NULL; 926 } 927 928 #endif /* INET6 */ 929 930 931 /* 932 * sends a reject message, consuming the mbuf passed as an argument. 933 */ 934 static void 935 send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip) 936 { 937 938 #if 0 939 /* XXX When ip is not guaranteed to be at mtod() we will 940 * need to account for this */ 941 * The mbuf will however be thrown away so we can adjust it. 942 * Remember we did an m_pullup on it already so we 943 * can make some assumptions about contiguousness. 944 */ 945 if (args->L3offset) 946 m_adj(m, args->L3offset); 947 #endif 948 if (code != ICMP_REJECT_RST && code != ICMP_REJECT_ABORT) { 949 /* Send an ICMP unreach */ 950 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 951 } else if (code == ICMP_REJECT_RST && args->f_id.proto == IPPROTO_TCP) { 952 struct tcphdr *const tcp = 953 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 954 if ( (tcp->th_flags & TH_RST) == 0) { 955 struct mbuf *m; 956 m = ipfw_send_pkt(args->m, &(args->f_id), 957 ntohl(tcp->th_seq), ntohl(tcp->th_ack), 958 tcp->th_flags | TH_RST); 959 if (m != NULL) 960 ip_output(m, NULL, NULL, 0, NULL, NULL); 961 } 962 FREE_PKT(args->m); 963 } else if (code == ICMP_REJECT_ABORT && 964 args->f_id.proto == IPPROTO_SCTP) { 965 struct mbuf *m; 966 struct sctphdr *sctp; 967 struct sctp_chunkhdr *chunk; 968 struct sctp_init *init; 969 u_int32_t v_tag; 970 int reflected; 971 972 sctp = L3HDR(struct sctphdr, mtod(args->m, struct ip *)); 973 reflected = 1; 974 v_tag = ntohl(sctp->v_tag); 975 if (iplen >= (ip->ip_hl << 2) + sizeof(struct sctphdr) + 976 sizeof(struct sctp_chunkhdr)) { 977 /* Look at the first chunk header if available */ 978 chunk = (struct sctp_chunkhdr *)(sctp + 1); 979 switch (chunk->chunk_type) { 980 case SCTP_INITIATION: 981 /* 982 * Packets containing an INIT chunk MUST have 983 * a zero v-tag. 984 */ 985 if (v_tag != 0) { 986 v_tag = 0; 987 break; 988 } 989 /* INIT chunk MUST NOT be bundled */ 990 if (iplen > 991 (ip->ip_hl << 2) + sizeof(struct sctphdr) + 992 ntohs(chunk->chunk_length) + 3) { 993 break; 994 } 995 /* Use the initiate tag if available */ 996 if ((iplen >= (ip->ip_hl << 2) + 997 sizeof(struct sctphdr) + 998 sizeof(struct sctp_chunkhdr) + 999 offsetof(struct sctp_init, a_rwnd))) { 1000 init = (struct sctp_init *)(chunk + 1); 1001 v_tag = ntohl(init->initiate_tag); 1002 reflected = 0; 1003 } 1004 break; 1005 case SCTP_ABORT_ASSOCIATION: 1006 /* 1007 * If the packet contains an ABORT chunk, don't 1008 * reply. 1009 * XXX: We should search through all chunks, 1010 * but don't do to avoid attacks. 1011 */ 1012 v_tag = 0; 1013 break; 1014 } 1015 } 1016 if (v_tag == 0) { 1017 m = NULL; 1018 } else { 1019 m = ipfw_send_abort(args->m, &(args->f_id), v_tag, 1020 reflected); 1021 } 1022 if (m != NULL) 1023 ip_output(m, NULL, NULL, 0, NULL, NULL); 1024 FREE_PKT(args->m); 1025 } else 1026 FREE_PKT(args->m); 1027 args->m = NULL; 1028 } 1029 1030 /* 1031 * Support for uid/gid/jail lookup. These tests are expensive 1032 * (because we may need to look into the list of active sockets) 1033 * so we cache the results. ugid_lookupp is 0 if we have not 1034 * yet done a lookup, 1 if we succeeded, and -1 if we tried 1035 * and failed. The function always returns the match value. 1036 * We could actually spare the variable and use *uc, setting 1037 * it to '(void *)check_uidgid if we have no info, NULL if 1038 * we tried and failed, or any other value if successful. 1039 */ 1040 static int 1041 check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp, 1042 struct ucred **uc) 1043 { 1044 #if defined(USERSPACE) 1045 return 0; // not supported in userspace 1046 #else 1047 #ifndef __FreeBSD__ 1048 /* XXX */ 1049 return cred_check(insn, proto, oif, 1050 dst_ip, dst_port, src_ip, src_port, 1051 (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb); 1052 #else /* FreeBSD */ 1053 struct in_addr src_ip, dst_ip; 1054 struct inpcbinfo *pi; 1055 struct ipfw_flow_id *id; 1056 struct inpcb *pcb, *inp; 1057 struct ifnet *oif; 1058 int lookupflags; 1059 int match; 1060 1061 id = &args->f_id; 1062 inp = args->inp; 1063 oif = args->oif; 1064 1065 /* 1066 * Check to see if the UDP or TCP stack supplied us with 1067 * the PCB. If so, rather then holding a lock and looking 1068 * up the PCB, we can use the one that was supplied. 1069 */ 1070 if (inp && *ugid_lookupp == 0) { 1071 INP_LOCK_ASSERT(inp); 1072 if (inp->inp_socket != NULL) { 1073 *uc = crhold(inp->inp_cred); 1074 *ugid_lookupp = 1; 1075 } else 1076 *ugid_lookupp = -1; 1077 } 1078 /* 1079 * If we have already been here and the packet has no 1080 * PCB entry associated with it, then we can safely 1081 * assume that this is a no match. 1082 */ 1083 if (*ugid_lookupp == -1) 1084 return (0); 1085 if (id->proto == IPPROTO_TCP) { 1086 lookupflags = 0; 1087 pi = &V_tcbinfo; 1088 } else if (id->proto == IPPROTO_UDP) { 1089 lookupflags = INPLOOKUP_WILDCARD; 1090 pi = &V_udbinfo; 1091 } else 1092 return 0; 1093 lookupflags |= INPLOOKUP_RLOCKPCB; 1094 match = 0; 1095 if (*ugid_lookupp == 0) { 1096 if (id->addr_type == 6) { 1097 #ifdef INET6 1098 if (oif == NULL) 1099 pcb = in6_pcblookup_mbuf(pi, 1100 &id->src_ip6, htons(id->src_port), 1101 &id->dst_ip6, htons(id->dst_port), 1102 lookupflags, oif, args->m); 1103 else 1104 pcb = in6_pcblookup_mbuf(pi, 1105 &id->dst_ip6, htons(id->dst_port), 1106 &id->src_ip6, htons(id->src_port), 1107 lookupflags, oif, args->m); 1108 #else 1109 *ugid_lookupp = -1; 1110 return (0); 1111 #endif 1112 } else { 1113 src_ip.s_addr = htonl(id->src_ip); 1114 dst_ip.s_addr = htonl(id->dst_ip); 1115 if (oif == NULL) 1116 pcb = in_pcblookup_mbuf(pi, 1117 src_ip, htons(id->src_port), 1118 dst_ip, htons(id->dst_port), 1119 lookupflags, oif, args->m); 1120 else 1121 pcb = in_pcblookup_mbuf(pi, 1122 dst_ip, htons(id->dst_port), 1123 src_ip, htons(id->src_port), 1124 lookupflags, oif, args->m); 1125 } 1126 if (pcb != NULL) { 1127 INP_RLOCK_ASSERT(pcb); 1128 *uc = crhold(pcb->inp_cred); 1129 *ugid_lookupp = 1; 1130 INP_RUNLOCK(pcb); 1131 } 1132 if (*ugid_lookupp == 0) { 1133 /* 1134 * We tried and failed, set the variable to -1 1135 * so we will not try again on this packet. 1136 */ 1137 *ugid_lookupp = -1; 1138 return (0); 1139 } 1140 } 1141 if (insn->o.opcode == O_UID) 1142 match = ((*uc)->cr_uid == (uid_t)insn->d[0]); 1143 else if (insn->o.opcode == O_GID) 1144 match = groupmember((gid_t)insn->d[0], *uc); 1145 else if (insn->o.opcode == O_JAIL) 1146 match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); 1147 return (match); 1148 #endif /* __FreeBSD__ */ 1149 #endif /* not supported in userspace */ 1150 } 1151 1152 /* 1153 * Helper function to set args with info on the rule after the matching 1154 * one. slot is precise, whereas we guess rule_id as they are 1155 * assigned sequentially. 1156 */ 1157 static inline void 1158 set_match(struct ip_fw_args *args, int slot, 1159 struct ip_fw_chain *chain) 1160 { 1161 args->rule.chain_id = chain->id; 1162 args->rule.slot = slot + 1; /* we use 0 as a marker */ 1163 args->rule.rule_id = 1 + chain->map[slot]->id; 1164 args->rule.rulenum = chain->map[slot]->rulenum; 1165 } 1166 1167 #ifndef LINEAR_SKIPTO 1168 /* 1169 * Helper function to enable cached rule lookups using 1170 * cached_id and cached_pos fields in ipfw rule. 1171 */ 1172 static int 1173 jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, 1174 int tablearg, int jump_backwards) 1175 { 1176 int f_pos; 1177 1178 /* If possible use cached f_pos (in f->cached_pos), 1179 * whose version is written in f->cached_id 1180 * (horrible hacks to avoid changing the ABI). 1181 */ 1182 if (num != IP_FW_TARG && f->cached_id == chain->id) 1183 f_pos = f->cached_pos; 1184 else { 1185 int i = IP_FW_ARG_TABLEARG(chain, num, skipto); 1186 /* make sure we do not jump backward */ 1187 if (jump_backwards == 0 && i <= f->rulenum) 1188 i = f->rulenum + 1; 1189 if (chain->idxmap != NULL) 1190 f_pos = chain->idxmap[i]; 1191 else 1192 f_pos = ipfw_find_rule(chain, i, 0); 1193 /* update the cache */ 1194 if (num != IP_FW_TARG) { 1195 f->cached_id = chain->id; 1196 f->cached_pos = f_pos; 1197 } 1198 } 1199 1200 return (f_pos); 1201 } 1202 #else 1203 /* 1204 * Helper function to enable real fast rule lookups. 1205 */ 1206 static int 1207 jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, 1208 int tablearg, int jump_backwards) 1209 { 1210 int f_pos; 1211 1212 num = IP_FW_ARG_TABLEARG(chain, num, skipto); 1213 /* make sure we do not jump backward */ 1214 if (jump_backwards == 0 && num <= f->rulenum) 1215 num = f->rulenum + 1; 1216 f_pos = chain->idxmap[num]; 1217 1218 return (f_pos); 1219 } 1220 #endif 1221 1222 #define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f) 1223 /* 1224 * The main check routine for the firewall. 1225 * 1226 * All arguments are in args so we can modify them and return them 1227 * back to the caller. 1228 * 1229 * Parameters: 1230 * 1231 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 1232 * Starts with the IP header. 1233 * args->eh (in) Mac header if present, NULL for layer3 packet. 1234 * args->L3offset Number of bytes bypassed if we came from L2. 1235 * e.g. often sizeof(eh) ** NOTYET ** 1236 * args->oif Outgoing interface, NULL if packet is incoming. 1237 * The incoming interface is in the mbuf. (in) 1238 * args->divert_rule (in/out) 1239 * Skip up to the first rule past this rule number; 1240 * upon return, non-zero port number for divert or tee. 1241 * 1242 * args->rule Pointer to the last matching rule (in/out) 1243 * args->next_hop Socket we are forwarding to (out). 1244 * args->next_hop6 IPv6 next hop we are forwarding to (out). 1245 * args->f_id Addresses grabbed from the packet (out) 1246 * args->rule.info a cookie depending on rule action 1247 * 1248 * Return value: 1249 * 1250 * IP_FW_PASS the packet must be accepted 1251 * IP_FW_DENY the packet must be dropped 1252 * IP_FW_DIVERT divert packet, port in m_tag 1253 * IP_FW_TEE tee packet, port in m_tag 1254 * IP_FW_DUMMYNET to dummynet, pipe in args->cookie 1255 * IP_FW_NETGRAPH into netgraph, cookie args->cookie 1256 * args->rule contains the matching rule, 1257 * args->rule.info has additional information. 1258 * 1259 */ 1260 int 1261 ipfw_chk(struct ip_fw_args *args) 1262 { 1263 1264 /* 1265 * Local variables holding state while processing a packet: 1266 * 1267 * IMPORTANT NOTE: to speed up the processing of rules, there 1268 * are some assumption on the values of the variables, which 1269 * are documented here. Should you change them, please check 1270 * the implementation of the various instructions to make sure 1271 * that they still work. 1272 * 1273 * args->eh The MAC header. It is non-null for a layer2 1274 * packet, it is NULL for a layer-3 packet. 1275 * **notyet** 1276 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. 1277 * 1278 * m | args->m Pointer to the mbuf, as received from the caller. 1279 * It may change if ipfw_chk() does an m_pullup, or if it 1280 * consumes the packet because it calls send_reject(). 1281 * XXX This has to change, so that ipfw_chk() never modifies 1282 * or consumes the buffer. 1283 * ip is the beginning of the ip(4 or 6) header. 1284 * Calculated by adding the L3offset to the start of data. 1285 * (Until we start using L3offset, the packet is 1286 * supposed to start with the ip header). 1287 */ 1288 struct mbuf *m = args->m; 1289 struct ip *ip = mtod(m, struct ip *); 1290 1291 /* 1292 * For rules which contain uid/gid or jail constraints, cache 1293 * a copy of the users credentials after the pcb lookup has been 1294 * executed. This will speed up the processing of rules with 1295 * these types of constraints, as well as decrease contention 1296 * on pcb related locks. 1297 */ 1298 #ifndef __FreeBSD__ 1299 struct bsd_ucred ucred_cache; 1300 #else 1301 struct ucred *ucred_cache = NULL; 1302 #endif 1303 int ucred_lookup = 0; 1304 1305 /* 1306 * oif | args->oif If NULL, ipfw_chk has been called on the 1307 * inbound path (ether_input, ip_input). 1308 * If non-NULL, ipfw_chk has been called on the outbound path 1309 * (ether_output, ip_output). 1310 */ 1311 struct ifnet *oif = args->oif; 1312 1313 int f_pos = 0; /* index of current rule in the array */ 1314 int retval = 0; 1315 1316 /* 1317 * hlen The length of the IP header. 1318 */ 1319 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 1320 1321 /* 1322 * offset The offset of a fragment. offset != 0 means that 1323 * we have a fragment at this offset of an IPv4 packet. 1324 * offset == 0 means that (if this is an IPv4 packet) 1325 * this is the first or only fragment. 1326 * For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header 1327 * or there is a single packet fragment (fragment header added 1328 * without needed). We will treat a single packet fragment as if 1329 * there was no fragment header (or log/block depending on the 1330 * V_fw_permit_single_frag6 sysctl setting). 1331 */ 1332 u_short offset = 0; 1333 u_short ip6f_mf = 0; 1334 1335 /* 1336 * Local copies of addresses. They are only valid if we have 1337 * an IP packet. 1338 * 1339 * proto The protocol. Set to 0 for non-ip packets, 1340 * or to the protocol read from the packet otherwise. 1341 * proto != 0 means that we have an IPv4 packet. 1342 * 1343 * src_port, dst_port port numbers, in HOST format. Only 1344 * valid for TCP and UDP packets. 1345 * 1346 * src_ip, dst_ip ip addresses, in NETWORK format. 1347 * Only valid for IPv4 packets. 1348 */ 1349 uint8_t proto; 1350 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ 1351 struct in_addr src_ip, dst_ip; /* NOTE: network format */ 1352 int iplen = 0; 1353 int pktlen; 1354 uint16_t etype = 0; /* Host order stored ether type */ 1355 1356 /* 1357 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 1358 * MATCH_NONE when checked and not matched (q = NULL), 1359 * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) 1360 */ 1361 int dyn_dir = MATCH_UNKNOWN; 1362 uint16_t dyn_name = 0; 1363 struct ip_fw *q = NULL; 1364 struct ip_fw_chain *chain = &V_layer3_chain; 1365 1366 /* 1367 * We store in ulp a pointer to the upper layer protocol header. 1368 * In the ipv4 case this is easy to determine from the header, 1369 * but for ipv6 we might have some additional headers in the middle. 1370 * ulp is NULL if not found. 1371 */ 1372 void *ulp = NULL; /* upper layer protocol pointer. */ 1373 1374 /* XXX ipv6 variables */ 1375 int is_ipv6 = 0; 1376 uint8_t icmp6_type = 0; 1377 uint16_t ext_hd = 0; /* bits vector for extension header filtering */ 1378 /* end of ipv6 variables */ 1379 1380 int is_ipv4 = 0; 1381 1382 int done = 0; /* flag to exit the outer loop */ 1383 1384 if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) 1385 return (IP_FW_PASS); /* accept */ 1386 1387 dst_ip.s_addr = 0; /* make sure it is initialized */ 1388 src_ip.s_addr = 0; /* make sure it is initialized */ 1389 pktlen = m->m_pkthdr.len; 1390 args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ 1391 proto = args->f_id.proto = 0; /* mark f_id invalid */ 1392 /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ 1393 1394 /* 1395 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, 1396 * then it sets p to point at the offset "len" in the mbuf. WARNING: the 1397 * pointer might become stale after other pullups (but we never use it 1398 * this way). 1399 */ 1400 #define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) 1401 #define PULLUP_LEN(_len, p, T) \ 1402 do { \ 1403 int x = (_len) + T; \ 1404 if ((m)->m_len < x) { \ 1405 args->m = m = m_pullup(m, x); \ 1406 if (m == NULL) \ 1407 goto pullup_failed; \ 1408 } \ 1409 p = (mtod(m, char *) + (_len)); \ 1410 } while (0) 1411 1412 /* 1413 * if we have an ether header, 1414 */ 1415 if (args->eh) 1416 etype = ntohs(args->eh->ether_type); 1417 1418 /* Identify IP packets and fill up variables. */ 1419 if (pktlen >= sizeof(struct ip6_hdr) && 1420 (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { 1421 struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; 1422 is_ipv6 = 1; 1423 args->f_id.addr_type = 6; 1424 hlen = sizeof(struct ip6_hdr); 1425 proto = ip6->ip6_nxt; 1426 1427 /* Search extension headers to find upper layer protocols */ 1428 while (ulp == NULL && offset == 0) { 1429 switch (proto) { 1430 case IPPROTO_ICMPV6: 1431 PULLUP_TO(hlen, ulp, struct icmp6_hdr); 1432 icmp6_type = ICMP6(ulp)->icmp6_type; 1433 break; 1434 1435 case IPPROTO_TCP: 1436 PULLUP_TO(hlen, ulp, struct tcphdr); 1437 dst_port = TCP(ulp)->th_dport; 1438 src_port = TCP(ulp)->th_sport; 1439 /* save flags for dynamic rules */ 1440 args->f_id._flags = TCP(ulp)->th_flags; 1441 break; 1442 1443 case IPPROTO_SCTP: 1444 if (pktlen >= hlen + sizeof(struct sctphdr) + 1445 sizeof(struct sctp_chunkhdr) + 1446 offsetof(struct sctp_init, a_rwnd)) 1447 PULLUP_LEN(hlen, ulp, 1448 sizeof(struct sctphdr) + 1449 sizeof(struct sctp_chunkhdr) + 1450 offsetof(struct sctp_init, a_rwnd)); 1451 else if (pktlen >= hlen + sizeof(struct sctphdr)) 1452 PULLUP_LEN(hlen, ulp, pktlen - hlen); 1453 else 1454 PULLUP_LEN(hlen, ulp, 1455 sizeof(struct sctphdr)); 1456 src_port = SCTP(ulp)->src_port; 1457 dst_port = SCTP(ulp)->dest_port; 1458 break; 1459 1460 case IPPROTO_UDP: 1461 PULLUP_TO(hlen, ulp, struct udphdr); 1462 dst_port = UDP(ulp)->uh_dport; 1463 src_port = UDP(ulp)->uh_sport; 1464 break; 1465 1466 case IPPROTO_HOPOPTS: /* RFC 2460 */ 1467 PULLUP_TO(hlen, ulp, struct ip6_hbh); 1468 ext_hd |= EXT_HOPOPTS; 1469 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; 1470 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 1471 ulp = NULL; 1472 break; 1473 1474 case IPPROTO_ROUTING: /* RFC 2460 */ 1475 PULLUP_TO(hlen, ulp, struct ip6_rthdr); 1476 switch (((struct ip6_rthdr *)ulp)->ip6r_type) { 1477 case 0: 1478 ext_hd |= EXT_RTHDR0; 1479 break; 1480 case 2: 1481 ext_hd |= EXT_RTHDR2; 1482 break; 1483 default: 1484 if (V_fw_verbose) 1485 printf("IPFW2: IPV6 - Unknown " 1486 "Routing Header type(%d)\n", 1487 ((struct ip6_rthdr *) 1488 ulp)->ip6r_type); 1489 if (V_fw_deny_unknown_exthdrs) 1490 return (IP_FW_DENY); 1491 break; 1492 } 1493 ext_hd |= EXT_ROUTING; 1494 hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; 1495 proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; 1496 ulp = NULL; 1497 break; 1498 1499 case IPPROTO_FRAGMENT: /* RFC 2460 */ 1500 PULLUP_TO(hlen, ulp, struct ip6_frag); 1501 ext_hd |= EXT_FRAGMENT; 1502 hlen += sizeof (struct ip6_frag); 1503 proto = ((struct ip6_frag *)ulp)->ip6f_nxt; 1504 offset = ((struct ip6_frag *)ulp)->ip6f_offlg & 1505 IP6F_OFF_MASK; 1506 ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg & 1507 IP6F_MORE_FRAG; 1508 if (V_fw_permit_single_frag6 == 0 && 1509 offset == 0 && ip6f_mf == 0) { 1510 if (V_fw_verbose) 1511 printf("IPFW2: IPV6 - Invalid " 1512 "Fragment Header\n"); 1513 if (V_fw_deny_unknown_exthdrs) 1514 return (IP_FW_DENY); 1515 break; 1516 } 1517 args->f_id.extra = 1518 ntohl(((struct ip6_frag *)ulp)->ip6f_ident); 1519 ulp = NULL; 1520 break; 1521 1522 case IPPROTO_DSTOPTS: /* RFC 2460 */ 1523 PULLUP_TO(hlen, ulp, struct ip6_hbh); 1524 ext_hd |= EXT_DSTOPTS; 1525 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; 1526 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 1527 ulp = NULL; 1528 break; 1529 1530 case IPPROTO_AH: /* RFC 2402 */ 1531 PULLUP_TO(hlen, ulp, struct ip6_ext); 1532 ext_hd |= EXT_AH; 1533 hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; 1534 proto = ((struct ip6_ext *)ulp)->ip6e_nxt; 1535 ulp = NULL; 1536 break; 1537 1538 case IPPROTO_ESP: /* RFC 2406 */ 1539 PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */ 1540 /* Anything past Seq# is variable length and 1541 * data past this ext. header is encrypted. */ 1542 ext_hd |= EXT_ESP; 1543 break; 1544 1545 case IPPROTO_NONE: /* RFC 2460 */ 1546 /* 1547 * Packet ends here, and IPv6 header has 1548 * already been pulled up. If ip6e_len!=0 1549 * then octets must be ignored. 1550 */ 1551 ulp = ip; /* non-NULL to get out of loop. */ 1552 break; 1553 1554 case IPPROTO_OSPFIGP: 1555 /* XXX OSPF header check? */ 1556 PULLUP_TO(hlen, ulp, struct ip6_ext); 1557 break; 1558 1559 case IPPROTO_PIM: 1560 /* XXX PIM header check? */ 1561 PULLUP_TO(hlen, ulp, struct pim); 1562 break; 1563 1564 case IPPROTO_GRE: /* RFC 1701 */ 1565 /* XXX GRE header check? */ 1566 PULLUP_TO(hlen, ulp, struct grehdr); 1567 break; 1568 1569 case IPPROTO_CARP: 1570 PULLUP_TO(hlen, ulp, struct carp_header); 1571 if (((struct carp_header *)ulp)->carp_version != 1572 CARP_VERSION) 1573 return (IP_FW_DENY); 1574 if (((struct carp_header *)ulp)->carp_type != 1575 CARP_ADVERTISEMENT) 1576 return (IP_FW_DENY); 1577 break; 1578 1579 case IPPROTO_IPV6: /* RFC 2893 */ 1580 PULLUP_TO(hlen, ulp, struct ip6_hdr); 1581 break; 1582 1583 case IPPROTO_IPV4: /* RFC 2893 */ 1584 PULLUP_TO(hlen, ulp, struct ip); 1585 break; 1586 1587 default: 1588 if (V_fw_verbose) 1589 printf("IPFW2: IPV6 - Unknown " 1590 "Extension Header(%d), ext_hd=%x\n", 1591 proto, ext_hd); 1592 if (V_fw_deny_unknown_exthdrs) 1593 return (IP_FW_DENY); 1594 PULLUP_TO(hlen, ulp, struct ip6_ext); 1595 break; 1596 } /*switch */ 1597 } 1598 ip = mtod(m, struct ip *); 1599 ip6 = (struct ip6_hdr *)ip; 1600 args->f_id.src_ip6 = ip6->ip6_src; 1601 args->f_id.dst_ip6 = ip6->ip6_dst; 1602 args->f_id.src_ip = 0; 1603 args->f_id.dst_ip = 0; 1604 args->f_id.flow_id6 = ntohl(ip6->ip6_flow); 1605 iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6); 1606 } else if (pktlen >= sizeof(struct ip) && 1607 (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) { 1608 is_ipv4 = 1; 1609 hlen = ip->ip_hl << 2; 1610 args->f_id.addr_type = 4; 1611 1612 /* 1613 * Collect parameters into local variables for faster matching. 1614 */ 1615 proto = ip->ip_p; 1616 src_ip = ip->ip_src; 1617 dst_ip = ip->ip_dst; 1618 offset = ntohs(ip->ip_off) & IP_OFFMASK; 1619 iplen = ntohs(ip->ip_len); 1620 1621 if (offset == 0) { 1622 switch (proto) { 1623 case IPPROTO_TCP: 1624 PULLUP_TO(hlen, ulp, struct tcphdr); 1625 dst_port = TCP(ulp)->th_dport; 1626 src_port = TCP(ulp)->th_sport; 1627 /* save flags for dynamic rules */ 1628 args->f_id._flags = TCP(ulp)->th_flags; 1629 break; 1630 1631 case IPPROTO_SCTP: 1632 if (pktlen >= hlen + sizeof(struct sctphdr) + 1633 sizeof(struct sctp_chunkhdr) + 1634 offsetof(struct sctp_init, a_rwnd)) 1635 PULLUP_LEN(hlen, ulp, 1636 sizeof(struct sctphdr) + 1637 sizeof(struct sctp_chunkhdr) + 1638 offsetof(struct sctp_init, a_rwnd)); 1639 else if (pktlen >= hlen + sizeof(struct sctphdr)) 1640 PULLUP_LEN(hlen, ulp, pktlen - hlen); 1641 else 1642 PULLUP_LEN(hlen, ulp, 1643 sizeof(struct sctphdr)); 1644 src_port = SCTP(ulp)->src_port; 1645 dst_port = SCTP(ulp)->dest_port; 1646 break; 1647 1648 case IPPROTO_UDP: 1649 PULLUP_TO(hlen, ulp, struct udphdr); 1650 dst_port = UDP(ulp)->uh_dport; 1651 src_port = UDP(ulp)->uh_sport; 1652 break; 1653 1654 case IPPROTO_ICMP: 1655 PULLUP_TO(hlen, ulp, struct icmphdr); 1656 //args->f_id.flags = ICMP(ulp)->icmp_type; 1657 break; 1658 1659 default: 1660 break; 1661 } 1662 } 1663 1664 ip = mtod(m, struct ip *); 1665 args->f_id.src_ip = ntohl(src_ip.s_addr); 1666 args->f_id.dst_ip = ntohl(dst_ip.s_addr); 1667 } 1668 #undef PULLUP_TO 1669 pktlen = iplen < pktlen ? iplen: pktlen; 1670 if (proto) { /* we may have port numbers, store them */ 1671 args->f_id.proto = proto; 1672 args->f_id.src_port = src_port = ntohs(src_port); 1673 args->f_id.dst_port = dst_port = ntohs(dst_port); 1674 } 1675 1676 IPFW_PF_RLOCK(chain); 1677 if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ 1678 IPFW_PF_RUNLOCK(chain); 1679 return (IP_FW_PASS); /* accept */ 1680 } 1681 if (args->rule.slot) { 1682 /* 1683 * Packet has already been tagged as a result of a previous 1684 * match on rule args->rule aka args->rule_id (PIPE, QUEUE, 1685 * REASS, NETGRAPH, DIVERT/TEE...) 1686 * Validate the slot and continue from the next one 1687 * if still present, otherwise do a lookup. 1688 */ 1689 f_pos = (args->rule.chain_id == chain->id) ? 1690 args->rule.slot : 1691 ipfw_find_rule(chain, args->rule.rulenum, 1692 args->rule.rule_id); 1693 } else { 1694 f_pos = 0; 1695 } 1696 1697 /* 1698 * Now scan the rules, and parse microinstructions for each rule. 1699 * We have two nested loops and an inner switch. Sometimes we 1700 * need to break out of one or both loops, or re-enter one of 1701 * the loops with updated variables. Loop variables are: 1702 * 1703 * f_pos (outer loop) points to the current rule. 1704 * On output it points to the matching rule. 1705 * done (outer loop) is used as a flag to break the loop. 1706 * l (inner loop) residual length of current rule. 1707 * cmd points to the current microinstruction. 1708 * 1709 * We break the inner loop by setting l=0 and possibly 1710 * cmdlen=0 if we don't want to advance cmd. 1711 * We break the outer loop by setting done=1 1712 * We can restart the inner loop by setting l>0 and f_pos, f, cmd 1713 * as needed. 1714 */ 1715 for (; f_pos < chain->n_rules; f_pos++) { 1716 ipfw_insn *cmd; 1717 uint32_t tablearg = 0; 1718 int l, cmdlen, skip_or; /* skip rest of OR block */ 1719 struct ip_fw *f; 1720 1721 f = chain->map[f_pos]; 1722 if (V_set_disable & (1 << f->set) ) 1723 continue; 1724 1725 skip_or = 0; 1726 for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; 1727 l -= cmdlen, cmd += cmdlen) { 1728 int match; 1729 1730 /* 1731 * check_body is a jump target used when we find a 1732 * CHECK_STATE, and need to jump to the body of 1733 * the target rule. 1734 */ 1735 1736 /* check_body: */ 1737 cmdlen = F_LEN(cmd); 1738 /* 1739 * An OR block (insn_1 || .. || insn_n) has the 1740 * F_OR bit set in all but the last instruction. 1741 * The first match will set "skip_or", and cause 1742 * the following instructions to be skipped until 1743 * past the one with the F_OR bit clear. 1744 */ 1745 if (skip_or) { /* skip this instruction */ 1746 if ((cmd->len & F_OR) == 0) 1747 skip_or = 0; /* next one is good */ 1748 continue; 1749 } 1750 match = 0; /* set to 1 if we succeed */ 1751 1752 switch (cmd->opcode) { 1753 /* 1754 * The first set of opcodes compares the packet's 1755 * fields with some pattern, setting 'match' if a 1756 * match is found. At the end of the loop there is 1757 * logic to deal with F_NOT and F_OR flags associated 1758 * with the opcode. 1759 */ 1760 case O_NOP: 1761 match = 1; 1762 break; 1763 1764 case O_FORWARD_MAC: 1765 printf("ipfw: opcode %d unimplemented\n", 1766 cmd->opcode); 1767 break; 1768 1769 case O_GID: 1770 case O_UID: 1771 case O_JAIL: 1772 /* 1773 * We only check offset == 0 && proto != 0, 1774 * as this ensures that we have a 1775 * packet with the ports info. 1776 */ 1777 if (offset != 0) 1778 break; 1779 if (proto == IPPROTO_TCP || 1780 proto == IPPROTO_UDP) 1781 match = check_uidgid( 1782 (ipfw_insn_u32 *)cmd, 1783 args, &ucred_lookup, 1784 #ifdef __FreeBSD__ 1785 &ucred_cache); 1786 #else 1787 (void *)&ucred_cache); 1788 #endif 1789 break; 1790 1791 case O_RECV: 1792 match = iface_match(m->m_pkthdr.rcvif, 1793 (ipfw_insn_if *)cmd, chain, &tablearg); 1794 break; 1795 1796 case O_XMIT: 1797 match = iface_match(oif, (ipfw_insn_if *)cmd, 1798 chain, &tablearg); 1799 break; 1800 1801 case O_VIA: 1802 match = iface_match(oif ? oif : 1803 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, 1804 chain, &tablearg); 1805 break; 1806 1807 case O_MACADDR2: 1808 if (args->eh != NULL) { /* have MAC header */ 1809 u_int32_t *want = (u_int32_t *) 1810 ((ipfw_insn_mac *)cmd)->addr; 1811 u_int32_t *mask = (u_int32_t *) 1812 ((ipfw_insn_mac *)cmd)->mask; 1813 u_int32_t *hdr = (u_int32_t *)args->eh; 1814 1815 match = 1816 ( want[0] == (hdr[0] & mask[0]) && 1817 want[1] == (hdr[1] & mask[1]) && 1818 want[2] == (hdr[2] & mask[2]) ); 1819 } 1820 break; 1821 1822 case O_MAC_TYPE: 1823 if (args->eh != NULL) { 1824 u_int16_t *p = 1825 ((ipfw_insn_u16 *)cmd)->ports; 1826 int i; 1827 1828 for (i = cmdlen - 1; !match && i>0; 1829 i--, p += 2) 1830 match = (etype >= p[0] && 1831 etype <= p[1]); 1832 } 1833 break; 1834 1835 case O_FRAG: 1836 match = (offset != 0); 1837 break; 1838 1839 case O_IN: /* "out" is "not in" */ 1840 match = (oif == NULL); 1841 break; 1842 1843 case O_LAYER2: 1844 match = (args->eh != NULL); 1845 break; 1846 1847 case O_DIVERTED: 1848 { 1849 /* For diverted packets, args->rule.info 1850 * contains the divert port (in host format) 1851 * reason and direction. 1852 */ 1853 uint32_t i = args->rule.info; 1854 match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT && 1855 cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2); 1856 } 1857 break; 1858 1859 case O_PROTO: 1860 /* 1861 * We do not allow an arg of 0 so the 1862 * check of "proto" only suffices. 1863 */ 1864 match = (proto == cmd->arg1); 1865 break; 1866 1867 case O_IP_SRC: 1868 match = is_ipv4 && 1869 (((ipfw_insn_ip *)cmd)->addr.s_addr == 1870 src_ip.s_addr); 1871 break; 1872 1873 case O_IP_DST_LOOKUP: 1874 { 1875 void *pkey; 1876 uint32_t vidx, key; 1877 uint16_t keylen; 1878 1879 if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { 1880 /* Determine lookup key type */ 1881 vidx = ((ipfw_insn_u32 *)cmd)->d[1]; 1882 if (vidx != 4 /* uid */ && 1883 vidx != 5 /* jail */ && 1884 is_ipv6 == 0 && is_ipv4 == 0) 1885 break; 1886 /* Determine key length */ 1887 if (vidx == 0 /* dst-ip */ || 1888 vidx == 1 /* src-ip */) 1889 keylen = is_ipv6 ? 1890 sizeof(struct in6_addr): 1891 sizeof(in_addr_t); 1892 else { 1893 keylen = sizeof(key); 1894 pkey = &key; 1895 } 1896 if (vidx == 0 /* dst-ip */) 1897 pkey = is_ipv4 ? (void *)&dst_ip: 1898 (void *)&args->f_id.dst_ip6; 1899 else if (vidx == 1 /* src-ip */) 1900 pkey = is_ipv4 ? (void *)&src_ip: 1901 (void *)&args->f_id.src_ip6; 1902 else if (vidx == 6 /* dscp */) { 1903 if (is_ipv4) 1904 key = ip->ip_tos >> 2; 1905 else { 1906 key = args->f_id.flow_id6; 1907 key = (key & 0x0f) << 2 | 1908 (key & 0xf000) >> 14; 1909 } 1910 key &= 0x3f; 1911 } else if (vidx == 2 /* dst-port */ || 1912 vidx == 3 /* src-port */) { 1913 /* Skip fragments */ 1914 if (offset != 0) 1915 break; 1916 /* Skip proto without ports */ 1917 if (proto != IPPROTO_TCP && 1918 proto != IPPROTO_UDP && 1919 proto != IPPROTO_SCTP) 1920 break; 1921 if (vidx == 2 /* dst-port */) 1922 key = dst_port; 1923 else 1924 key = src_port; 1925 } 1926 #ifndef USERSPACE 1927 else if (vidx == 4 /* uid */ || 1928 vidx == 5 /* jail */) { 1929 check_uidgid( 1930 (ipfw_insn_u32 *)cmd, 1931 args, &ucred_lookup, 1932 #ifdef __FreeBSD__ 1933 &ucred_cache); 1934 if (vidx == 4 /* uid */) 1935 key = ucred_cache->cr_uid; 1936 else if (vidx == 5 /* jail */) 1937 key = ucred_cache->cr_prison->pr_id; 1938 #else /* !__FreeBSD__ */ 1939 (void *)&ucred_cache); 1940 if (vidx == 4 /* uid */) 1941 key = ucred_cache.uid; 1942 else if (vidx == 5 /* jail */) 1943 key = ucred_cache.xid; 1944 #endif /* !__FreeBSD__ */ 1945 } 1946 #endif /* !USERSPACE */ 1947 else 1948 break; 1949 match = ipfw_lookup_table(chain, 1950 cmd->arg1, keylen, pkey, &vidx); 1951 if (!match) 1952 break; 1953 tablearg = vidx; 1954 break; 1955 } 1956 /* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */ 1957 /* FALLTHROUGH */ 1958 } 1959 case O_IP_SRC_LOOKUP: 1960 { 1961 void *pkey; 1962 uint32_t vidx; 1963 uint16_t keylen; 1964 1965 if (is_ipv4) { 1966 keylen = sizeof(in_addr_t); 1967 if (cmd->opcode == O_IP_DST_LOOKUP) 1968 pkey = &dst_ip; 1969 else 1970 pkey = &src_ip; 1971 } else if (is_ipv6) { 1972 keylen = sizeof(struct in6_addr); 1973 if (cmd->opcode == O_IP_DST_LOOKUP) 1974 pkey = &args->f_id.dst_ip6; 1975 else 1976 pkey = &args->f_id.src_ip6; 1977 } else 1978 break; 1979 match = ipfw_lookup_table(chain, cmd->arg1, 1980 keylen, pkey, &vidx); 1981 if (!match) 1982 break; 1983 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) { 1984 match = ((ipfw_insn_u32 *)cmd)->d[0] == 1985 TARG_VAL(chain, vidx, tag); 1986 if (!match) 1987 break; 1988 } 1989 tablearg = vidx; 1990 break; 1991 } 1992 1993 case O_IP_FLOW_LOOKUP: 1994 { 1995 uint32_t v = 0; 1996 match = ipfw_lookup_table(chain, 1997 cmd->arg1, 0, &args->f_id, &v); 1998 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) 1999 match = ((ipfw_insn_u32 *)cmd)->d[0] == 2000 TARG_VAL(chain, v, tag); 2001 if (match) 2002 tablearg = v; 2003 } 2004 break; 2005 case O_IP_SRC_MASK: 2006 case O_IP_DST_MASK: 2007 if (is_ipv4) { 2008 uint32_t a = 2009 (cmd->opcode == O_IP_DST_MASK) ? 2010 dst_ip.s_addr : src_ip.s_addr; 2011 uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; 2012 int i = cmdlen-1; 2013 2014 for (; !match && i>0; i-= 2, p+= 2) 2015 match = (p[0] == (a & p[1])); 2016 } 2017 break; 2018 2019 case O_IP_SRC_ME: 2020 if (is_ipv4) { 2021 match = in_localip(src_ip); 2022 break; 2023 } 2024 #ifdef INET6 2025 /* FALLTHROUGH */ 2026 case O_IP6_SRC_ME: 2027 match= is_ipv6 && ipfw_localip6(&args->f_id.src_ip6); 2028 #endif 2029 break; 2030 2031 case O_IP_DST_SET: 2032 case O_IP_SRC_SET: 2033 if (is_ipv4) { 2034 u_int32_t *d = (u_int32_t *)(cmd+1); 2035 u_int32_t addr = 2036 cmd->opcode == O_IP_DST_SET ? 2037 args->f_id.dst_ip : 2038 args->f_id.src_ip; 2039 2040 if (addr < d[0]) 2041 break; 2042 addr -= d[0]; /* subtract base */ 2043 match = (addr < cmd->arg1) && 2044 ( d[ 1 + (addr>>5)] & 2045 (1<<(addr & 0x1f)) ); 2046 } 2047 break; 2048 2049 case O_IP_DST: 2050 match = is_ipv4 && 2051 (((ipfw_insn_ip *)cmd)->addr.s_addr == 2052 dst_ip.s_addr); 2053 break; 2054 2055 case O_IP_DST_ME: 2056 if (is_ipv4) { 2057 match = in_localip(dst_ip); 2058 break; 2059 } 2060 #ifdef INET6 2061 /* FALLTHROUGH */ 2062 case O_IP6_DST_ME: 2063 match= is_ipv6 && ipfw_localip6(&args->f_id.dst_ip6); 2064 #endif 2065 break; 2066 2067 2068 case O_IP_SRCPORT: 2069 case O_IP_DSTPORT: 2070 /* 2071 * offset == 0 && proto != 0 is enough 2072 * to guarantee that we have a 2073 * packet with port info. 2074 */ 2075 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP || 2076 proto==IPPROTO_SCTP) && offset == 0) { 2077 u_int16_t x = 2078 (cmd->opcode == O_IP_SRCPORT) ? 2079 src_port : dst_port ; 2080 u_int16_t *p = 2081 ((ipfw_insn_u16 *)cmd)->ports; 2082 int i; 2083 2084 for (i = cmdlen - 1; !match && i>0; 2085 i--, p += 2) 2086 match = (x>=p[0] && x<=p[1]); 2087 } 2088 break; 2089 2090 case O_ICMPTYPE: 2091 match = (offset == 0 && proto==IPPROTO_ICMP && 2092 icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); 2093 break; 2094 2095 #ifdef INET6 2096 case O_ICMP6TYPE: 2097 match = is_ipv6 && offset == 0 && 2098 proto==IPPROTO_ICMPV6 && 2099 icmp6type_match( 2100 ICMP6(ulp)->icmp6_type, 2101 (ipfw_insn_u32 *)cmd); 2102 break; 2103 #endif /* INET6 */ 2104 2105 case O_IPOPT: 2106 match = (is_ipv4 && 2107 ipopts_match(ip, cmd) ); 2108 break; 2109 2110 case O_IPVER: 2111 match = (is_ipv4 && 2112 cmd->arg1 == ip->ip_v); 2113 break; 2114 2115 case O_IPID: 2116 case O_IPLEN: 2117 case O_IPTTL: 2118 if (is_ipv4) { /* only for IP packets */ 2119 uint16_t x; 2120 uint16_t *p; 2121 int i; 2122 2123 if (cmd->opcode == O_IPLEN) 2124 x = iplen; 2125 else if (cmd->opcode == O_IPTTL) 2126 x = ip->ip_ttl; 2127 else /* must be IPID */ 2128 x = ntohs(ip->ip_id); 2129 if (cmdlen == 1) { 2130 match = (cmd->arg1 == x); 2131 break; 2132 } 2133 /* otherwise we have ranges */ 2134 p = ((ipfw_insn_u16 *)cmd)->ports; 2135 i = cmdlen - 1; 2136 for (; !match && i>0; i--, p += 2) 2137 match = (x >= p[0] && x <= p[1]); 2138 } 2139 break; 2140 2141 case O_IPPRECEDENCE: 2142 match = (is_ipv4 && 2143 (cmd->arg1 == (ip->ip_tos & 0xe0)) ); 2144 break; 2145 2146 case O_IPTOS: 2147 match = (is_ipv4 && 2148 flags_match(cmd, ip->ip_tos)); 2149 break; 2150 2151 case O_DSCP: 2152 { 2153 uint32_t *p; 2154 uint16_t x; 2155 2156 p = ((ipfw_insn_u32 *)cmd)->d; 2157 2158 if (is_ipv4) 2159 x = ip->ip_tos >> 2; 2160 else if (is_ipv6) { 2161 uint8_t *v; 2162 v = &((struct ip6_hdr *)ip)->ip6_vfc; 2163 x = (*v & 0x0F) << 2; 2164 v++; 2165 x |= *v >> 6; 2166 } else 2167 break; 2168 2169 /* DSCP bitmask is stored as low_u32 high_u32 */ 2170 if (x >= 32) 2171 match = *(p + 1) & (1 << (x - 32)); 2172 else 2173 match = *p & (1 << x); 2174 } 2175 break; 2176 2177 case O_TCPDATALEN: 2178 if (proto == IPPROTO_TCP && offset == 0) { 2179 struct tcphdr *tcp; 2180 uint16_t x; 2181 uint16_t *p; 2182 int i; 2183 #ifdef INET6 2184 if (is_ipv6) { 2185 struct ip6_hdr *ip6; 2186 2187 ip6 = (struct ip6_hdr *)ip; 2188 if (ip6->ip6_plen == 0) { 2189 /* 2190 * Jumbo payload is not 2191 * supported by this 2192 * opcode. 2193 */ 2194 break; 2195 } 2196 x = iplen - hlen; 2197 } else 2198 #endif /* INET6 */ 2199 x = iplen - (ip->ip_hl << 2); 2200 tcp = TCP(ulp); 2201 x -= tcp->th_off << 2; 2202 if (cmdlen == 1) { 2203 match = (cmd->arg1 == x); 2204 break; 2205 } 2206 /* otherwise we have ranges */ 2207 p = ((ipfw_insn_u16 *)cmd)->ports; 2208 i = cmdlen - 1; 2209 for (; !match && i>0; i--, p += 2) 2210 match = (x >= p[0] && x <= p[1]); 2211 } 2212 break; 2213 2214 case O_TCPFLAGS: 2215 match = (proto == IPPROTO_TCP && offset == 0 && 2216 flags_match(cmd, TCP(ulp)->th_flags)); 2217 break; 2218 2219 case O_TCPOPTS: 2220 if (proto == IPPROTO_TCP && offset == 0 && ulp){ 2221 PULLUP_LEN(hlen, ulp, 2222 (TCP(ulp)->th_off << 2)); 2223 match = tcpopts_match(TCP(ulp), cmd); 2224 } 2225 break; 2226 2227 case O_TCPSEQ: 2228 match = (proto == IPPROTO_TCP && offset == 0 && 2229 ((ipfw_insn_u32 *)cmd)->d[0] == 2230 TCP(ulp)->th_seq); 2231 break; 2232 2233 case O_TCPACK: 2234 match = (proto == IPPROTO_TCP && offset == 0 && 2235 ((ipfw_insn_u32 *)cmd)->d[0] == 2236 TCP(ulp)->th_ack); 2237 break; 2238 2239 case O_TCPWIN: 2240 if (proto == IPPROTO_TCP && offset == 0) { 2241 uint16_t x; 2242 uint16_t *p; 2243 int i; 2244 2245 x = ntohs(TCP(ulp)->th_win); 2246 if (cmdlen == 1) { 2247 match = (cmd->arg1 == x); 2248 break; 2249 } 2250 /* Otherwise we have ranges. */ 2251 p = ((ipfw_insn_u16 *)cmd)->ports; 2252 i = cmdlen - 1; 2253 for (; !match && i > 0; i--, p += 2) 2254 match = (x >= p[0] && x <= p[1]); 2255 } 2256 break; 2257 2258 case O_ESTAB: 2259 /* reject packets which have SYN only */ 2260 /* XXX should i also check for TH_ACK ? */ 2261 match = (proto == IPPROTO_TCP && offset == 0 && 2262 (TCP(ulp)->th_flags & 2263 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 2264 break; 2265 2266 case O_ALTQ: { 2267 struct pf_mtag *at; 2268 struct m_tag *mtag; 2269 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; 2270 2271 /* 2272 * ALTQ uses mbuf tags from another 2273 * packet filtering system - pf(4). 2274 * We allocate a tag in its format 2275 * and fill it in, pretending to be pf(4). 2276 */ 2277 match = 1; 2278 at = pf_find_mtag(m); 2279 if (at != NULL && at->qid != 0) 2280 break; 2281 mtag = m_tag_get(PACKET_TAG_PF, 2282 sizeof(struct pf_mtag), M_NOWAIT | M_ZERO); 2283 if (mtag == NULL) { 2284 /* 2285 * Let the packet fall back to the 2286 * default ALTQ. 2287 */ 2288 break; 2289 } 2290 m_tag_prepend(m, mtag); 2291 at = (struct pf_mtag *)(mtag + 1); 2292 at->qid = altq->qid; 2293 at->hdr = ip; 2294 break; 2295 } 2296 2297 case O_LOG: 2298 ipfw_log(chain, f, hlen, args, m, 2299 oif, offset | ip6f_mf, tablearg, ip); 2300 match = 1; 2301 break; 2302 2303 case O_PROB: 2304 match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); 2305 break; 2306 2307 case O_VERREVPATH: 2308 /* Outgoing packets automatically pass/match */ 2309 match = ((oif != NULL) || 2310 (m->m_pkthdr.rcvif == NULL) || 2311 ( 2312 #ifdef INET6 2313 is_ipv6 ? 2314 verify_path6(&(args->f_id.src_ip6), 2315 m->m_pkthdr.rcvif, args->f_id.fib) : 2316 #endif 2317 verify_path(src_ip, m->m_pkthdr.rcvif, 2318 args->f_id.fib))); 2319 break; 2320 2321 case O_VERSRCREACH: 2322 /* Outgoing packets automatically pass/match */ 2323 match = (hlen > 0 && ((oif != NULL) || 2324 #ifdef INET6 2325 is_ipv6 ? 2326 verify_path6(&(args->f_id.src_ip6), 2327 NULL, args->f_id.fib) : 2328 #endif 2329 verify_path(src_ip, NULL, args->f_id.fib))); 2330 break; 2331 2332 case O_ANTISPOOF: 2333 /* Outgoing packets automatically pass/match */ 2334 if (oif == NULL && hlen > 0 && 2335 ( (is_ipv4 && in_localaddr(src_ip)) 2336 #ifdef INET6 2337 || (is_ipv6 && 2338 in6_localaddr(&(args->f_id.src_ip6))) 2339 #endif 2340 )) 2341 match = 2342 #ifdef INET6 2343 is_ipv6 ? verify_path6( 2344 &(args->f_id.src_ip6), 2345 m->m_pkthdr.rcvif, 2346 args->f_id.fib) : 2347 #endif 2348 verify_path(src_ip, 2349 m->m_pkthdr.rcvif, 2350 args->f_id.fib); 2351 else 2352 match = 1; 2353 break; 2354 2355 case O_IPSEC: 2356 match = (m_tag_find(m, 2357 PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); 2358 /* otherwise no match */ 2359 break; 2360 2361 #ifdef INET6 2362 case O_IP6_SRC: 2363 match = is_ipv6 && 2364 IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, 2365 &((ipfw_insn_ip6 *)cmd)->addr6); 2366 break; 2367 2368 case O_IP6_DST: 2369 match = is_ipv6 && 2370 IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, 2371 &((ipfw_insn_ip6 *)cmd)->addr6); 2372 break; 2373 case O_IP6_SRC_MASK: 2374 case O_IP6_DST_MASK: 2375 if (is_ipv6) { 2376 int i = cmdlen - 1; 2377 struct in6_addr p; 2378 struct in6_addr *d = 2379 &((ipfw_insn_ip6 *)cmd)->addr6; 2380 2381 for (; !match && i > 0; d += 2, 2382 i -= F_INSN_SIZE(struct in6_addr) 2383 * 2) { 2384 p = (cmd->opcode == 2385 O_IP6_SRC_MASK) ? 2386 args->f_id.src_ip6: 2387 args->f_id.dst_ip6; 2388 APPLY_MASK(&p, &d[1]); 2389 match = 2390 IN6_ARE_ADDR_EQUAL(&d[0], 2391 &p); 2392 } 2393 } 2394 break; 2395 2396 case O_FLOW6ID: 2397 match = is_ipv6 && 2398 flow6id_match(args->f_id.flow_id6, 2399 (ipfw_insn_u32 *) cmd); 2400 break; 2401 2402 case O_EXT_HDR: 2403 match = is_ipv6 && 2404 (ext_hd & ((ipfw_insn *) cmd)->arg1); 2405 break; 2406 2407 case O_IP6: 2408 match = is_ipv6; 2409 break; 2410 #endif 2411 2412 case O_IP4: 2413 match = is_ipv4; 2414 break; 2415 2416 case O_TAG: { 2417 struct m_tag *mtag; 2418 uint32_t tag = TARG(cmd->arg1, tag); 2419 2420 /* Packet is already tagged with this tag? */ 2421 mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); 2422 2423 /* We have `untag' action when F_NOT flag is 2424 * present. And we must remove this mtag from 2425 * mbuf and reset `match' to zero (`match' will 2426 * be inversed later). 2427 * Otherwise we should allocate new mtag and 2428 * push it into mbuf. 2429 */ 2430 if (cmd->len & F_NOT) { /* `untag' action */ 2431 if (mtag != NULL) 2432 m_tag_delete(m, mtag); 2433 match = 0; 2434 } else { 2435 if (mtag == NULL) { 2436 mtag = m_tag_alloc( MTAG_IPFW, 2437 tag, 0, M_NOWAIT); 2438 if (mtag != NULL) 2439 m_tag_prepend(m, mtag); 2440 } 2441 match = 1; 2442 } 2443 break; 2444 } 2445 2446 case O_FIB: /* try match the specified fib */ 2447 if (args->f_id.fib == cmd->arg1) 2448 match = 1; 2449 break; 2450 2451 case O_SOCKARG: { 2452 #ifndef USERSPACE /* not supported in userspace */ 2453 struct inpcb *inp = args->inp; 2454 struct inpcbinfo *pi; 2455 2456 if (is_ipv6) /* XXX can we remove this ? */ 2457 break; 2458 2459 if (proto == IPPROTO_TCP) 2460 pi = &V_tcbinfo; 2461 else if (proto == IPPROTO_UDP) 2462 pi = &V_udbinfo; 2463 else 2464 break; 2465 2466 /* 2467 * XXXRW: so_user_cookie should almost 2468 * certainly be inp_user_cookie? 2469 */ 2470 2471 /* For incoming packet, lookup up the 2472 inpcb using the src/dest ip/port tuple */ 2473 if (inp == NULL) { 2474 inp = in_pcblookup(pi, 2475 src_ip, htons(src_port), 2476 dst_ip, htons(dst_port), 2477 INPLOOKUP_RLOCKPCB, NULL); 2478 if (inp != NULL) { 2479 tablearg = 2480 inp->inp_socket->so_user_cookie; 2481 if (tablearg) 2482 match = 1; 2483 INP_RUNLOCK(inp); 2484 } 2485 } else { 2486 if (inp->inp_socket) { 2487 tablearg = 2488 inp->inp_socket->so_user_cookie; 2489 if (tablearg) 2490 match = 1; 2491 } 2492 } 2493 #endif /* !USERSPACE */ 2494 break; 2495 } 2496 2497 case O_TAGGED: { 2498 struct m_tag *mtag; 2499 uint32_t tag = TARG(cmd->arg1, tag); 2500 2501 if (cmdlen == 1) { 2502 match = m_tag_locate(m, MTAG_IPFW, 2503 tag, NULL) != NULL; 2504 break; 2505 } 2506 2507 /* we have ranges */ 2508 for (mtag = m_tag_first(m); 2509 mtag != NULL && !match; 2510 mtag = m_tag_next(m, mtag)) { 2511 uint16_t *p; 2512 int i; 2513 2514 if (mtag->m_tag_cookie != MTAG_IPFW) 2515 continue; 2516 2517 p = ((ipfw_insn_u16 *)cmd)->ports; 2518 i = cmdlen - 1; 2519 for(; !match && i > 0; i--, p += 2) 2520 match = 2521 mtag->m_tag_id >= p[0] && 2522 mtag->m_tag_id <= p[1]; 2523 } 2524 break; 2525 } 2526 2527 /* 2528 * The second set of opcodes represents 'actions', 2529 * i.e. the terminal part of a rule once the packet 2530 * matches all previous patterns. 2531 * Typically there is only one action for each rule, 2532 * and the opcode is stored at the end of the rule 2533 * (but there are exceptions -- see below). 2534 * 2535 * In general, here we set retval and terminate the 2536 * outer loop (would be a 'break 3' in some language, 2537 * but we need to set l=0, done=1) 2538 * 2539 * Exceptions: 2540 * O_COUNT and O_SKIPTO actions: 2541 * instead of terminating, we jump to the next rule 2542 * (setting l=0), or to the SKIPTO target (setting 2543 * f/f_len, cmd and l as needed), respectively. 2544 * 2545 * O_TAG, O_LOG and O_ALTQ action parameters: 2546 * perform some action and set match = 1; 2547 * 2548 * O_LIMIT and O_KEEP_STATE: these opcodes are 2549 * not real 'actions', and are stored right 2550 * before the 'action' part of the rule. 2551 * These opcodes try to install an entry in the 2552 * state tables; if successful, we continue with 2553 * the next opcode (match=1; break;), otherwise 2554 * the packet must be dropped (set retval, 2555 * break loops with l=0, done=1) 2556 * 2557 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 2558 * cause a lookup of the state table, and a jump 2559 * to the 'action' part of the parent rule 2560 * if an entry is found, or 2561 * (CHECK_STATE only) a jump to the next rule if 2562 * the entry is not found. 2563 * The result of the lookup is cached so that 2564 * further instances of these opcodes become NOPs. 2565 * The jump to the next rule is done by setting 2566 * l=0, cmdlen=0. 2567 */ 2568 case O_LIMIT: 2569 case O_KEEP_STATE: 2570 if (ipfw_dyn_install_state(chain, f, 2571 (ipfw_insn_limit *)cmd, args, tablearg)) { 2572 /* error or limit violation */ 2573 retval = IP_FW_DENY; 2574 l = 0; /* exit inner loop */ 2575 done = 1; /* exit outer loop */ 2576 } 2577 match = 1; 2578 break; 2579 2580 case O_PROBE_STATE: 2581 case O_CHECK_STATE: 2582 /* 2583 * dynamic rules are checked at the first 2584 * keep-state or check-state occurrence, 2585 * with the result being stored in dyn_dir 2586 * and dyn_name. 2587 * The compiler introduces a PROBE_STATE 2588 * instruction for us when we have a 2589 * KEEP_STATE (because PROBE_STATE needs 2590 * to be run first). 2591 * 2592 * (dyn_dir == MATCH_UNKNOWN) means this is 2593 * first lookup for such f_id. Do lookup. 2594 * 2595 * (dyn_dir != MATCH_UNKNOWN && 2596 * dyn_name != 0 && dyn_name != cmd->arg1) 2597 * means previous lookup didn't find dynamic 2598 * rule for specific state name and current 2599 * lookup will search rule with another state 2600 * name. Redo lookup. 2601 * 2602 * (dyn_dir != MATCH_UNKNOWN && dyn_name == 0) 2603 * means previous lookup was for `any' name 2604 * and it didn't find rule. No need to do 2605 * lookup again. 2606 */ 2607 if ((dyn_dir == MATCH_UNKNOWN || 2608 (dyn_name != 0 && 2609 dyn_name != cmd->arg1)) && 2610 (q = ipfw_dyn_lookup_state(&args->f_id, 2611 ulp, pktlen, &dyn_dir, 2612 (dyn_name = cmd->arg1))) != NULL) { 2613 /* 2614 * Found dynamic entry, jump to the 2615 * 'action' part of the parent rule 2616 * by setting f, cmd, l and clearing 2617 * cmdlen. 2618 */ 2619 f = q; 2620 /* XXX we would like to have f_pos 2621 * readily accessible in the dynamic 2622 * rule, instead of having to 2623 * lookup q->rule. 2624 */ 2625 f_pos = ipfw_find_rule(chain, 2626 f->rulenum, f->id); 2627 cmd = ACTION_PTR(f); 2628 l = f->cmd_len - f->act_ofs; 2629 cmdlen = 0; 2630 match = 1; 2631 break; 2632 } 2633 /* 2634 * Dynamic entry not found. If CHECK_STATE, 2635 * skip to next rule, if PROBE_STATE just 2636 * ignore and continue with next opcode. 2637 */ 2638 if (cmd->opcode == O_CHECK_STATE) 2639 l = 0; /* exit inner loop */ 2640 match = 1; 2641 break; 2642 2643 case O_ACCEPT: 2644 retval = 0; /* accept */ 2645 l = 0; /* exit inner loop */ 2646 done = 1; /* exit outer loop */ 2647 break; 2648 2649 case O_PIPE: 2650 case O_QUEUE: 2651 set_match(args, f_pos, chain); 2652 args->rule.info = TARG(cmd->arg1, pipe); 2653 if (cmd->opcode == O_PIPE) 2654 args->rule.info |= IPFW_IS_PIPE; 2655 if (V_fw_one_pass) 2656 args->rule.info |= IPFW_ONEPASS; 2657 retval = IP_FW_DUMMYNET; 2658 l = 0; /* exit inner loop */ 2659 done = 1; /* exit outer loop */ 2660 break; 2661 2662 case O_DIVERT: 2663 case O_TEE: 2664 if (args->eh) /* not on layer 2 */ 2665 break; 2666 /* otherwise this is terminal */ 2667 l = 0; /* exit inner loop */ 2668 done = 1; /* exit outer loop */ 2669 retval = (cmd->opcode == O_DIVERT) ? 2670 IP_FW_DIVERT : IP_FW_TEE; 2671 set_match(args, f_pos, chain); 2672 args->rule.info = TARG(cmd->arg1, divert); 2673 break; 2674 2675 case O_COUNT: 2676 IPFW_INC_RULE_COUNTER(f, pktlen); 2677 l = 0; /* exit inner loop */ 2678 break; 2679 2680 case O_SKIPTO: 2681 IPFW_INC_RULE_COUNTER(f, pktlen); 2682 f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0); 2683 /* 2684 * Skip disabled rules, and re-enter 2685 * the inner loop with the correct 2686 * f_pos, f, l and cmd. 2687 * Also clear cmdlen and skip_or 2688 */ 2689 for (; f_pos < chain->n_rules - 1 && 2690 (V_set_disable & 2691 (1 << chain->map[f_pos]->set)); 2692 f_pos++) 2693 ; 2694 /* Re-enter the inner loop at the skipto rule. */ 2695 f = chain->map[f_pos]; 2696 l = f->cmd_len; 2697 cmd = f->cmd; 2698 match = 1; 2699 cmdlen = 0; 2700 skip_or = 0; 2701 continue; 2702 break; /* not reached */ 2703 2704 case O_CALLRETURN: { 2705 /* 2706 * Implementation of `subroutine' call/return, 2707 * in the stack carried in an mbuf tag. This 2708 * is different from `skipto' in that any call 2709 * address is possible (`skipto' must prevent 2710 * backward jumps to avoid endless loops). 2711 * We have `return' action when F_NOT flag is 2712 * present. The `m_tag_id' field is used as 2713 * stack pointer. 2714 */ 2715 struct m_tag *mtag; 2716 uint16_t jmpto, *stack; 2717 2718 #define IS_CALL ((cmd->len & F_NOT) == 0) 2719 #define IS_RETURN ((cmd->len & F_NOT) != 0) 2720 /* 2721 * Hand-rolled version of m_tag_locate() with 2722 * wildcard `type'. 2723 * If not already tagged, allocate new tag. 2724 */ 2725 mtag = m_tag_first(m); 2726 while (mtag != NULL) { 2727 if (mtag->m_tag_cookie == 2728 MTAG_IPFW_CALL) 2729 break; 2730 mtag = m_tag_next(m, mtag); 2731 } 2732 if (mtag == NULL && IS_CALL) { 2733 mtag = m_tag_alloc(MTAG_IPFW_CALL, 0, 2734 IPFW_CALLSTACK_SIZE * 2735 sizeof(uint16_t), M_NOWAIT); 2736 if (mtag != NULL) 2737 m_tag_prepend(m, mtag); 2738 } 2739 2740 /* 2741 * On error both `call' and `return' just 2742 * continue with next rule. 2743 */ 2744 if (IS_RETURN && (mtag == NULL || 2745 mtag->m_tag_id == 0)) { 2746 l = 0; /* exit inner loop */ 2747 break; 2748 } 2749 if (IS_CALL && (mtag == NULL || 2750 mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) { 2751 printf("ipfw: call stack error, " 2752 "go to next rule\n"); 2753 l = 0; /* exit inner loop */ 2754 break; 2755 } 2756 2757 IPFW_INC_RULE_COUNTER(f, pktlen); 2758 stack = (uint16_t *)(mtag + 1); 2759 2760 /* 2761 * The `call' action may use cached f_pos 2762 * (in f->next_rule), whose version is written 2763 * in f->next_rule. 2764 * The `return' action, however, doesn't have 2765 * fixed jump address in cmd->arg1 and can't use 2766 * cache. 2767 */ 2768 if (IS_CALL) { 2769 stack[mtag->m_tag_id] = f->rulenum; 2770 mtag->m_tag_id++; 2771 f_pos = JUMP(chain, f, cmd->arg1, 2772 tablearg, 1); 2773 } else { /* `return' action */ 2774 mtag->m_tag_id--; 2775 jmpto = stack[mtag->m_tag_id] + 1; 2776 f_pos = ipfw_find_rule(chain, jmpto, 0); 2777 } 2778 2779 /* 2780 * Skip disabled rules, and re-enter 2781 * the inner loop with the correct 2782 * f_pos, f, l and cmd. 2783 * Also clear cmdlen and skip_or 2784 */ 2785 for (; f_pos < chain->n_rules - 1 && 2786 (V_set_disable & 2787 (1 << chain->map[f_pos]->set)); f_pos++) 2788 ; 2789 /* Re-enter the inner loop at the dest rule. */ 2790 f = chain->map[f_pos]; 2791 l = f->cmd_len; 2792 cmd = f->cmd; 2793 cmdlen = 0; 2794 skip_or = 0; 2795 continue; 2796 break; /* NOTREACHED */ 2797 } 2798 #undef IS_CALL 2799 #undef IS_RETURN 2800 2801 case O_REJECT: 2802 /* 2803 * Drop the packet and send a reject notice 2804 * if the packet is not ICMP (or is an ICMP 2805 * query), and it is not multicast/broadcast. 2806 */ 2807 if (hlen > 0 && is_ipv4 && offset == 0 && 2808 (proto != IPPROTO_ICMP || 2809 is_icmp_query(ICMP(ulp))) && 2810 !(m->m_flags & (M_BCAST|M_MCAST)) && 2811 !IN_MULTICAST(ntohl(dst_ip.s_addr))) { 2812 send_reject(args, cmd->arg1, iplen, ip); 2813 m = args->m; 2814 } 2815 /* FALLTHROUGH */ 2816 #ifdef INET6 2817 case O_UNREACH6: 2818 if (hlen > 0 && is_ipv6 && 2819 ((offset & IP6F_OFF_MASK) == 0) && 2820 (proto != IPPROTO_ICMPV6 || 2821 (is_icmp6_query(icmp6_type) == 1)) && 2822 !(m->m_flags & (M_BCAST|M_MCAST)) && 2823 !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { 2824 send_reject6( 2825 args, cmd->arg1, hlen, 2826 (struct ip6_hdr *)ip); 2827 m = args->m; 2828 } 2829 /* FALLTHROUGH */ 2830 #endif 2831 case O_DENY: 2832 retval = IP_FW_DENY; 2833 l = 0; /* exit inner loop */ 2834 done = 1; /* exit outer loop */ 2835 break; 2836 2837 case O_FORWARD_IP: 2838 if (args->eh) /* not valid on layer2 pkts */ 2839 break; 2840 if (q != f || dyn_dir == MATCH_FORWARD) { 2841 struct sockaddr_in *sa; 2842 2843 sa = &(((ipfw_insn_sa *)cmd)->sa); 2844 if (sa->sin_addr.s_addr == INADDR_ANY) { 2845 #ifdef INET6 2846 /* 2847 * We use O_FORWARD_IP opcode for 2848 * fwd rule with tablearg, but tables 2849 * now support IPv6 addresses. And 2850 * when we are inspecting IPv6 packet, 2851 * we can use nh6 field from 2852 * table_value as next_hop6 address. 2853 */ 2854 if (is_ipv6) { 2855 struct sockaddr_in6 *sa6; 2856 2857 sa6 = args->next_hop6 = 2858 &args->hopstore6; 2859 sa6->sin6_family = AF_INET6; 2860 sa6->sin6_len = sizeof(*sa6); 2861 sa6->sin6_addr = TARG_VAL( 2862 chain, tablearg, nh6); 2863 sa6->sin6_port = sa->sin_port; 2864 /* 2865 * Set sin6_scope_id only for 2866 * link-local unicast addresses. 2867 */ 2868 if (IN6_IS_ADDR_LINKLOCAL( 2869 &sa6->sin6_addr)) 2870 sa6->sin6_scope_id = 2871 TARG_VAL(chain, 2872 tablearg, 2873 zoneid); 2874 } else 2875 #endif 2876 { 2877 args->hopstore.sin_port = 2878 sa->sin_port; 2879 sa = args->next_hop = 2880 &args->hopstore; 2881 sa->sin_family = AF_INET; 2882 sa->sin_len = sizeof(*sa); 2883 sa->sin_addr.s_addr = htonl( 2884 TARG_VAL(chain, tablearg, 2885 nh4)); 2886 } 2887 } else { 2888 args->next_hop = sa; 2889 } 2890 } 2891 retval = IP_FW_PASS; 2892 l = 0; /* exit inner loop */ 2893 done = 1; /* exit outer loop */ 2894 break; 2895 2896 #ifdef INET6 2897 case O_FORWARD_IP6: 2898 if (args->eh) /* not valid on layer2 pkts */ 2899 break; 2900 if (q != f || dyn_dir == MATCH_FORWARD) { 2901 struct sockaddr_in6 *sin6; 2902 2903 sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); 2904 args->next_hop6 = sin6; 2905 } 2906 retval = IP_FW_PASS; 2907 l = 0; /* exit inner loop */ 2908 done = 1; /* exit outer loop */ 2909 break; 2910 #endif 2911 2912 case O_NETGRAPH: 2913 case O_NGTEE: 2914 set_match(args, f_pos, chain); 2915 args->rule.info = TARG(cmd->arg1, netgraph); 2916 if (V_fw_one_pass) 2917 args->rule.info |= IPFW_ONEPASS; 2918 retval = (cmd->opcode == O_NETGRAPH) ? 2919 IP_FW_NETGRAPH : IP_FW_NGTEE; 2920 l = 0; /* exit inner loop */ 2921 done = 1; /* exit outer loop */ 2922 break; 2923 2924 case O_SETFIB: { 2925 uint32_t fib; 2926 2927 IPFW_INC_RULE_COUNTER(f, pktlen); 2928 fib = TARG(cmd->arg1, fib) & 0x7FFF; 2929 if (fib >= rt_numfibs) 2930 fib = 0; 2931 M_SETFIB(m, fib); 2932 args->f_id.fib = fib; 2933 l = 0; /* exit inner loop */ 2934 break; 2935 } 2936 2937 case O_SETDSCP: { 2938 uint16_t code; 2939 2940 code = TARG(cmd->arg1, dscp) & 0x3F; 2941 l = 0; /* exit inner loop */ 2942 if (is_ipv4) { 2943 uint16_t old; 2944 2945 old = *(uint16_t *)ip; 2946 ip->ip_tos = (code << 2) | 2947 (ip->ip_tos & 0x03); 2948 ip->ip_sum = cksum_adjust(ip->ip_sum, 2949 old, *(uint16_t *)ip); 2950 } else if (is_ipv6) { 2951 uint8_t *v; 2952 2953 v = &((struct ip6_hdr *)ip)->ip6_vfc; 2954 *v = (*v & 0xF0) | (code >> 2); 2955 v++; 2956 *v = (*v & 0x3F) | ((code & 0x03) << 6); 2957 } else 2958 break; 2959 2960 IPFW_INC_RULE_COUNTER(f, pktlen); 2961 break; 2962 } 2963 2964 case O_NAT: 2965 l = 0; /* exit inner loop */ 2966 done = 1; /* exit outer loop */ 2967 /* 2968 * Ensure that we do not invoke NAT handler for 2969 * non IPv4 packets. Libalias expects only IPv4. 2970 */ 2971 if (!is_ipv4 || !IPFW_NAT_LOADED) { 2972 retval = IP_FW_DENY; 2973 break; 2974 } 2975 2976 struct cfg_nat *t; 2977 int nat_id; 2978 2979 set_match(args, f_pos, chain); 2980 /* Check if this is 'global' nat rule */ 2981 if (cmd->arg1 == IP_FW_NAT44_GLOBAL) { 2982 retval = ipfw_nat_ptr(args, NULL, m); 2983 break; 2984 } 2985 t = ((ipfw_insn_nat *)cmd)->nat; 2986 if (t == NULL) { 2987 nat_id = TARG(cmd->arg1, nat); 2988 t = (*lookup_nat_ptr)(&chain->nat, nat_id); 2989 2990 if (t == NULL) { 2991 retval = IP_FW_DENY; 2992 break; 2993 } 2994 if (cmd->arg1 != IP_FW_TARG) 2995 ((ipfw_insn_nat *)cmd)->nat = t; 2996 } 2997 retval = ipfw_nat_ptr(args, t, m); 2998 break; 2999 3000 case O_REASS: { 3001 int ip_off; 3002 3003 IPFW_INC_RULE_COUNTER(f, pktlen); 3004 l = 0; /* in any case exit inner loop */ 3005 ip_off = ntohs(ip->ip_off); 3006 3007 /* if not fragmented, go to next rule */ 3008 if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) 3009 break; 3010 3011 args->m = m = ip_reass(m); 3012 3013 /* 3014 * do IP header checksum fixup. 3015 */ 3016 if (m == NULL) { /* fragment got swallowed */ 3017 retval = IP_FW_DENY; 3018 } else { /* good, packet complete */ 3019 int hlen; 3020 3021 ip = mtod(m, struct ip *); 3022 hlen = ip->ip_hl << 2; 3023 ip->ip_sum = 0; 3024 if (hlen == sizeof(struct ip)) 3025 ip->ip_sum = in_cksum_hdr(ip); 3026 else 3027 ip->ip_sum = in_cksum(m, hlen); 3028 retval = IP_FW_REASS; 3029 set_match(args, f_pos, chain); 3030 } 3031 done = 1; /* exit outer loop */ 3032 break; 3033 } 3034 case O_EXTERNAL_ACTION: 3035 l = 0; /* in any case exit inner loop */ 3036 retval = ipfw_run_eaction(chain, args, 3037 cmd, &done); 3038 /* 3039 * If both @retval and @done are zero, 3040 * consider this as rule matching and 3041 * update counters. 3042 */ 3043 if (retval == 0 && done == 0) { 3044 IPFW_INC_RULE_COUNTER(f, pktlen); 3045 /* 3046 * Reset the result of the last 3047 * dynamic state lookup. 3048 * External action can change 3049 * @args content, and it may be 3050 * used for new state lookup later. 3051 */ 3052 dyn_dir = MATCH_UNKNOWN; 3053 } 3054 break; 3055 3056 default: 3057 panic("-- unknown opcode %d\n", cmd->opcode); 3058 } /* end of switch() on opcodes */ 3059 /* 3060 * if we get here with l=0, then match is irrelevant. 3061 */ 3062 3063 if (cmd->len & F_NOT) 3064 match = !match; 3065 3066 if (match) { 3067 if (cmd->len & F_OR) 3068 skip_or = 1; 3069 } else { 3070 if (!(cmd->len & F_OR)) /* not an OR block, */ 3071 break; /* try next rule */ 3072 } 3073 3074 } /* end of inner loop, scan opcodes */ 3075 #undef PULLUP_LEN 3076 3077 if (done) 3078 break; 3079 3080 /* next_rule:; */ /* try next rule */ 3081 3082 } /* end of outer for, scan rules */ 3083 3084 if (done) { 3085 struct ip_fw *rule = chain->map[f_pos]; 3086 /* Update statistics */ 3087 IPFW_INC_RULE_COUNTER(rule, pktlen); 3088 } else { 3089 retval = IP_FW_DENY; 3090 printf("ipfw: ouch!, skip past end of rules, denying packet\n"); 3091 } 3092 IPFW_PF_RUNLOCK(chain); 3093 #ifdef __FreeBSD__ 3094 if (ucred_cache != NULL) 3095 crfree(ucred_cache); 3096 #endif 3097 return (retval); 3098 3099 pullup_failed: 3100 if (V_fw_verbose) 3101 printf("ipfw: pullup failed\n"); 3102 return (IP_FW_DENY); 3103 } 3104 3105 /* 3106 * Set maximum number of tables that can be used in given VNET ipfw instance. 3107 */ 3108 #ifdef SYSCTL_NODE 3109 static int 3110 sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) 3111 { 3112 int error; 3113 unsigned int ntables; 3114 3115 ntables = V_fw_tables_max; 3116 3117 error = sysctl_handle_int(oidp, &ntables, 0, req); 3118 /* Read operation or some error */ 3119 if ((error != 0) || (req->newptr == NULL)) 3120 return (error); 3121 3122 return (ipfw_resize_tables(&V_layer3_chain, ntables)); 3123 } 3124 3125 /* 3126 * Switches table namespace between global and per-set. 3127 */ 3128 static int 3129 sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS) 3130 { 3131 int error; 3132 unsigned int sets; 3133 3134 sets = V_fw_tables_sets; 3135 3136 error = sysctl_handle_int(oidp, &sets, 0, req); 3137 /* Read operation or some error */ 3138 if ((error != 0) || (req->newptr == NULL)) 3139 return (error); 3140 3141 return (ipfw_switch_tables_namespace(&V_layer3_chain, sets)); 3142 } 3143 #endif 3144 3145 /* 3146 * Module and VNET glue 3147 */ 3148 3149 /* 3150 * Stuff that must be initialised only on boot or module load 3151 */ 3152 static int 3153 ipfw_init(void) 3154 { 3155 int error = 0; 3156 3157 /* 3158 * Only print out this stuff the first time around, 3159 * when called from the sysinit code. 3160 */ 3161 printf("ipfw2 " 3162 #ifdef INET6 3163 "(+ipv6) " 3164 #endif 3165 "initialized, divert %s, nat %s, " 3166 "default to %s, logging ", 3167 #ifdef IPDIVERT 3168 "enabled", 3169 #else 3170 "loadable", 3171 #endif 3172 #ifdef IPFIREWALL_NAT 3173 "enabled", 3174 #else 3175 "loadable", 3176 #endif 3177 default_to_accept ? "accept" : "deny"); 3178 3179 /* 3180 * Note: V_xxx variables can be accessed here but the vnet specific 3181 * initializer may not have been called yet for the VIMAGE case. 3182 * Tuneables will have been processed. We will print out values for 3183 * the default vnet. 3184 * XXX This should all be rationalized AFTER 8.0 3185 */ 3186 if (V_fw_verbose == 0) 3187 printf("disabled\n"); 3188 else if (V_verbose_limit == 0) 3189 printf("unlimited\n"); 3190 else 3191 printf("limited to %d packets/entry by default\n", 3192 V_verbose_limit); 3193 3194 /* Check user-supplied table count for validness */ 3195 if (default_fw_tables > IPFW_TABLES_MAX) 3196 default_fw_tables = IPFW_TABLES_MAX; 3197 3198 ipfw_init_sopt_handler(); 3199 ipfw_init_obj_rewriter(); 3200 ipfw_iface_init(); 3201 return (error); 3202 } 3203 3204 /* 3205 * Called for the removal of the last instance only on module unload. 3206 */ 3207 static void 3208 ipfw_destroy(void) 3209 { 3210 3211 ipfw_iface_destroy(); 3212 ipfw_destroy_sopt_handler(); 3213 ipfw_destroy_obj_rewriter(); 3214 printf("IP firewall unloaded\n"); 3215 } 3216 3217 /* 3218 * Stuff that must be initialized for every instance 3219 * (including the first of course). 3220 */ 3221 static int 3222 vnet_ipfw_init(const void *unused) 3223 { 3224 int error, first; 3225 struct ip_fw *rule = NULL; 3226 struct ip_fw_chain *chain; 3227 3228 chain = &V_layer3_chain; 3229 3230 first = IS_DEFAULT_VNET(curvnet) ? 1 : 0; 3231 3232 /* First set up some values that are compile time options */ 3233 V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ 3234 V_fw_deny_unknown_exthdrs = 1; 3235 #ifdef IPFIREWALL_VERBOSE 3236 V_fw_verbose = 1; 3237 #endif 3238 #ifdef IPFIREWALL_VERBOSE_LIMIT 3239 V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 3240 #endif 3241 #ifdef IPFIREWALL_NAT 3242 LIST_INIT(&chain->nat); 3243 #endif 3244 3245 /* Init shared services hash table */ 3246 ipfw_init_srv(chain); 3247 3248 ipfw_init_counters(); 3249 /* Set initial number of tables */ 3250 V_fw_tables_max = default_fw_tables; 3251 error = ipfw_init_tables(chain, first); 3252 if (error) { 3253 printf("ipfw2: setting up tables failed\n"); 3254 free(chain->map, M_IPFW); 3255 free(rule, M_IPFW); 3256 return (ENOSPC); 3257 } 3258 3259 IPFW_LOCK_INIT(chain); 3260 3261 /* fill and insert the default rule */ 3262 rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw)); 3263 rule->cmd_len = 1; 3264 rule->cmd[0].len = 1; 3265 rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; 3266 chain->default_rule = rule; 3267 ipfw_add_protected_rule(chain, rule, 0); 3268 3269 ipfw_dyn_init(chain); 3270 ipfw_eaction_init(chain, first); 3271 #ifdef LINEAR_SKIPTO 3272 ipfw_init_skipto_cache(chain); 3273 #endif 3274 ipfw_bpf_init(first); 3275 3276 /* First set up some values that are compile time options */ 3277 V_ipfw_vnet_ready = 1; /* Open for business */ 3278 3279 /* 3280 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6. 3281 * Even if the latter two fail we still keep the module alive 3282 * because the sockopt and layer2 paths are still useful. 3283 * ipfw[6]_hook return 0 on success, ENOENT on failure, 3284 * so we can ignore the exact return value and just set a flag. 3285 * 3286 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so 3287 * changes in the underlying (per-vnet) variables trigger 3288 * immediate hook()/unhook() calls. 3289 * In layer2 we have the same behaviour, except that V_ether_ipfw 3290 * is checked on each packet because there are no pfil hooks. 3291 */ 3292 V_ip_fw_ctl_ptr = ipfw_ctl3; 3293 error = ipfw_attach_hooks(1); 3294 return (error); 3295 } 3296 3297 /* 3298 * Called for the removal of each instance. 3299 */ 3300 static int 3301 vnet_ipfw_uninit(const void *unused) 3302 { 3303 struct ip_fw *reap; 3304 struct ip_fw_chain *chain = &V_layer3_chain; 3305 int i, last; 3306 3307 V_ipfw_vnet_ready = 0; /* tell new callers to go away */ 3308 /* 3309 * disconnect from ipv4, ipv6, layer2 and sockopt. 3310 * Then grab, release and grab again the WLOCK so we make 3311 * sure the update is propagated and nobody will be in. 3312 */ 3313 (void)ipfw_attach_hooks(0 /* detach */); 3314 V_ip_fw_ctl_ptr = NULL; 3315 3316 last = IS_DEFAULT_VNET(curvnet) ? 1 : 0; 3317 3318 IPFW_UH_WLOCK(chain); 3319 IPFW_UH_WUNLOCK(chain); 3320 3321 ipfw_dyn_uninit(0); /* run the callout_drain */ 3322 3323 IPFW_UH_WLOCK(chain); 3324 3325 reap = NULL; 3326 IPFW_WLOCK(chain); 3327 for (i = 0; i < chain->n_rules; i++) 3328 ipfw_reap_add(chain, &reap, chain->map[i]); 3329 free(chain->map, M_IPFW); 3330 #ifdef LINEAR_SKIPTO 3331 ipfw_destroy_skipto_cache(chain); 3332 #endif 3333 IPFW_WUNLOCK(chain); 3334 IPFW_UH_WUNLOCK(chain); 3335 ipfw_destroy_tables(chain, last); 3336 ipfw_eaction_uninit(chain, last); 3337 if (reap != NULL) 3338 ipfw_reap_rules(reap); 3339 vnet_ipfw_iface_destroy(chain); 3340 ipfw_destroy_srv(chain); 3341 IPFW_LOCK_DESTROY(chain); 3342 ipfw_dyn_uninit(1); /* free the remaining parts */ 3343 ipfw_destroy_counters(); 3344 ipfw_bpf_uninit(last); 3345 return (0); 3346 } 3347 3348 /* 3349 * Module event handler. 3350 * In general we have the choice of handling most of these events by the 3351 * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to 3352 * use the SYSINIT handlers as they are more capable of expressing the 3353 * flow of control during module and vnet operations, so this is just 3354 * a skeleton. Note there is no SYSINIT equivalent of the module 3355 * SHUTDOWN handler, but we don't have anything to do in that case anyhow. 3356 */ 3357 static int 3358 ipfw_modevent(module_t mod, int type, void *unused) 3359 { 3360 int err = 0; 3361 3362 switch (type) { 3363 case MOD_LOAD: 3364 /* Called once at module load or 3365 * system boot if compiled in. */ 3366 break; 3367 case MOD_QUIESCE: 3368 /* Called before unload. May veto unloading. */ 3369 break; 3370 case MOD_UNLOAD: 3371 /* Called during unload. */ 3372 break; 3373 case MOD_SHUTDOWN: 3374 /* Called during system shutdown. */ 3375 break; 3376 default: 3377 err = EOPNOTSUPP; 3378 break; 3379 } 3380 return err; 3381 } 3382 3383 static moduledata_t ipfwmod = { 3384 "ipfw", 3385 ipfw_modevent, 3386 0 3387 }; 3388 3389 /* Define startup order. */ 3390 #define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL 3391 #define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */ 3392 #define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */ 3393 #define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */ 3394 3395 DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER); 3396 FEATURE(ipfw_ctl3, "ipfw new sockopt calls"); 3397 MODULE_VERSION(ipfw, 3); 3398 /* should declare some dependencies here */ 3399 3400 /* 3401 * Starting up. Done in order after ipfwmod() has been called. 3402 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 3403 */ 3404 SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, 3405 ipfw_init, NULL); 3406 VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, 3407 vnet_ipfw_init, NULL); 3408 3409 /* 3410 * Closing up shop. These are done in REVERSE ORDER, but still 3411 * after ipfwmod() has been called. Not called on reboot. 3412 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 3413 * or when the module is unloaded. 3414 */ 3415 SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, 3416 ipfw_destroy, NULL); 3417 VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, 3418 vnet_ipfw_uninit, NULL); 3419 /* end of file */ 3420