1 /*- 2 * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include <sys/cdefs.h> 27 __FBSDID("$FreeBSD$"); 28 29 /* 30 * The FreeBSD IP packet firewall, main file 31 */ 32 33 #include "opt_ipfw.h" 34 #include "opt_ipdivert.h" 35 #include "opt_inet.h" 36 #ifndef INET 37 #error "IPFIREWALL requires INET" 38 #endif /* INET */ 39 #include "opt_inet6.h" 40 #include "opt_ipsec.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/condvar.h> 45 #include <sys/counter.h> 46 #include <sys/eventhandler.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/jail.h> 52 #include <sys/module.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/rwlock.h> 56 #include <sys/rmlock.h> 57 #include <sys/socket.h> 58 #include <sys/socketvar.h> 59 #include <sys/sysctl.h> 60 #include <sys/syslog.h> 61 #include <sys/ucred.h> 62 #include <net/ethernet.h> /* for ETHERTYPE_IP */ 63 #include <net/if.h> 64 #include <net/if_var.h> 65 #include <net/route.h> 66 #include <net/pfil.h> 67 #include <net/vnet.h> 68 69 #include <netpfil/pf/pf_mtag.h> 70 71 #include <netinet/in.h> 72 #include <netinet/in_var.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/ip.h> 75 #include <netinet/ip_var.h> 76 #include <netinet/ip_icmp.h> 77 #include <netinet/ip_fw.h> 78 #include <netinet/ip_carp.h> 79 #include <netinet/pim.h> 80 #include <netinet/tcp_var.h> 81 #include <netinet/udp.h> 82 #include <netinet/udp_var.h> 83 #include <netinet/sctp.h> 84 85 #include <netinet/ip6.h> 86 #include <netinet/icmp6.h> 87 #ifdef INET6 88 #include <netinet6/in6_pcb.h> 89 #include <netinet6/scope6_var.h> 90 #include <netinet6/ip6_var.h> 91 #endif 92 93 #include <netpfil/ipfw/ip_fw_private.h> 94 95 #include <machine/in_cksum.h> /* XXX for in_cksum */ 96 97 #ifdef MAC 98 #include <security/mac/mac_framework.h> 99 #endif 100 101 /* 102 * static variables followed by global ones. 103 * All ipfw global variables are here. 104 */ 105 106 static VNET_DEFINE(int, fw_deny_unknown_exthdrs); 107 #define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) 108 109 static VNET_DEFINE(int, fw_permit_single_frag6) = 1; 110 #define V_fw_permit_single_frag6 VNET(fw_permit_single_frag6) 111 112 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 113 static int default_to_accept = 1; 114 #else 115 static int default_to_accept; 116 #endif 117 118 VNET_DEFINE(int, autoinc_step); 119 VNET_DEFINE(int, fw_one_pass) = 1; 120 121 VNET_DEFINE(unsigned int, fw_tables_max); 122 VNET_DEFINE(unsigned int, fw_tables_sets) = 0; /* Don't use set-aware tables */ 123 /* Use 128 tables by default */ 124 static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; 125 126 #ifndef LINEAR_SKIPTO 127 static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, 128 int tablearg, int jump_backwards); 129 #define JUMP(ch, f, num, targ, back) jump_fast(ch, f, num, targ, back) 130 #else 131 static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, 132 int tablearg, int jump_backwards); 133 #define JUMP(ch, f, num, targ, back) jump_linear(ch, f, num, targ, back) 134 #endif 135 136 /* 137 * Each rule belongs to one of 32 different sets (0..31). 138 * The variable set_disable contains one bit per set. 139 * If the bit is set, all rules in the corresponding set 140 * are disabled. Set RESVD_SET(31) is reserved for the default rule 141 * and rules that are not deleted by the flush command, 142 * and CANNOT be disabled. 143 * Rules in set RESVD_SET can only be deleted individually. 144 */ 145 VNET_DEFINE(u_int32_t, set_disable); 146 #define V_set_disable VNET(set_disable) 147 148 VNET_DEFINE(int, fw_verbose); 149 /* counter for ipfw_log(NULL...) */ 150 VNET_DEFINE(u_int64_t, norule_counter); 151 VNET_DEFINE(int, verbose_limit); 152 153 /* layer3_chain contains the list of rules for layer 3 */ 154 VNET_DEFINE(struct ip_fw_chain, layer3_chain); 155 156 /* ipfw_vnet_ready controls when we are open for business */ 157 VNET_DEFINE(int, ipfw_vnet_ready) = 0; 158 159 VNET_DEFINE(int, ipfw_nat_ready) = 0; 160 161 ipfw_nat_t *ipfw_nat_ptr = NULL; 162 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); 163 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; 164 ipfw_nat_cfg_t *ipfw_nat_del_ptr; 165 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; 166 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; 167 168 #ifdef SYSCTL_NODE 169 uint32_t dummy_def = IPFW_DEFAULT_RULE; 170 static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS); 171 static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS); 172 173 SYSBEGIN(f3) 174 175 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 176 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, 177 CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, 178 "Only do a single pass through ipfw when using dummynet(4)"); 179 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, 180 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, 181 "Rule number auto-increment step"); 182 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, 183 CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, 184 "Log matches to ipfw rules"); 185 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, 186 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, 187 "Set upper limit of matches of ipfw rules logged"); 188 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, 189 &dummy_def, 0, 190 "The default/max possible rule number."); 191 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max, 192 CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU", 193 "Maximum number of concurrently used tables"); 194 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets, 195 CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 196 0, 0, sysctl_ipfw_tables_sets, "IU", 197 "Use per-set namespace for tables"); 198 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, 199 &default_to_accept, 0, 200 "Make the default rule accept all packets."); 201 TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables); 202 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, 203 CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, 204 "Number of static rules"); 205 206 #ifdef INET6 207 SYSCTL_DECL(_net_inet6_ip6); 208 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 209 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, 210 CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, 211 &VNET_NAME(fw_deny_unknown_exthdrs), 0, 212 "Deny packets with unknown IPv6 Extension Headers"); 213 SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6, 214 CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, 215 &VNET_NAME(fw_permit_single_frag6), 0, 216 "Permit single packet IPv6 fragments"); 217 #endif /* INET6 */ 218 219 SYSEND 220 221 #endif /* SYSCTL_NODE */ 222 223 224 /* 225 * Some macros used in the various matching options. 226 * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T 227 * Other macros just cast void * into the appropriate type 228 */ 229 #define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) 230 #define TCP(p) ((struct tcphdr *)(p)) 231 #define SCTP(p) ((struct sctphdr *)(p)) 232 #define UDP(p) ((struct udphdr *)(p)) 233 #define ICMP(p) ((struct icmphdr *)(p)) 234 #define ICMP6(p) ((struct icmp6_hdr *)(p)) 235 236 static __inline int 237 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) 238 { 239 int type = icmp->icmp_type; 240 241 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) ); 242 } 243 244 #define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \ 245 (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) ) 246 247 static int 248 is_icmp_query(struct icmphdr *icmp) 249 { 250 int type = icmp->icmp_type; 251 252 return (type <= ICMP_MAXTYPE && (TT & (1<<type)) ); 253 } 254 #undef TT 255 256 /* 257 * The following checks use two arrays of 8 or 16 bits to store the 258 * bits that we want set or clear, respectively. They are in the 259 * low and high half of cmd->arg1 or cmd->d[0]. 260 * 261 * We scan options and store the bits we find set. We succeed if 262 * 263 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 264 * 265 * The code is sometimes optimized not to store additional variables. 266 */ 267 268 static int 269 flags_match(ipfw_insn *cmd, u_int8_t bits) 270 { 271 u_char want_clear; 272 bits = ~bits; 273 274 if ( ((cmd->arg1 & 0xff) & bits) != 0) 275 return 0; /* some bits we want set were clear */ 276 want_clear = (cmd->arg1 >> 8) & 0xff; 277 if ( (want_clear & bits) != want_clear) 278 return 0; /* some bits we want clear were set */ 279 return 1; 280 } 281 282 static int 283 ipopts_match(struct ip *ip, ipfw_insn *cmd) 284 { 285 int optlen, bits = 0; 286 u_char *cp = (u_char *)(ip + 1); 287 int x = (ip->ip_hl << 2) - sizeof (struct ip); 288 289 for (; x > 0; x -= optlen, cp += optlen) { 290 int opt = cp[IPOPT_OPTVAL]; 291 292 if (opt == IPOPT_EOL) 293 break; 294 if (opt == IPOPT_NOP) 295 optlen = 1; 296 else { 297 optlen = cp[IPOPT_OLEN]; 298 if (optlen <= 0 || optlen > x) 299 return 0; /* invalid or truncated */ 300 } 301 switch (opt) { 302 303 default: 304 break; 305 306 case IPOPT_LSRR: 307 bits |= IP_FW_IPOPT_LSRR; 308 break; 309 310 case IPOPT_SSRR: 311 bits |= IP_FW_IPOPT_SSRR; 312 break; 313 314 case IPOPT_RR: 315 bits |= IP_FW_IPOPT_RR; 316 break; 317 318 case IPOPT_TS: 319 bits |= IP_FW_IPOPT_TS; 320 break; 321 } 322 } 323 return (flags_match(cmd, bits)); 324 } 325 326 static int 327 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) 328 { 329 int optlen, bits = 0; 330 u_char *cp = (u_char *)(tcp + 1); 331 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 332 333 for (; x > 0; x -= optlen, cp += optlen) { 334 int opt = cp[0]; 335 if (opt == TCPOPT_EOL) 336 break; 337 if (opt == TCPOPT_NOP) 338 optlen = 1; 339 else { 340 optlen = cp[1]; 341 if (optlen <= 0) 342 break; 343 } 344 345 switch (opt) { 346 347 default: 348 break; 349 350 case TCPOPT_MAXSEG: 351 bits |= IP_FW_TCPOPT_MSS; 352 break; 353 354 case TCPOPT_WINDOW: 355 bits |= IP_FW_TCPOPT_WINDOW; 356 break; 357 358 case TCPOPT_SACK_PERMITTED: 359 case TCPOPT_SACK: 360 bits |= IP_FW_TCPOPT_SACK; 361 break; 362 363 case TCPOPT_TIMESTAMP: 364 bits |= IP_FW_TCPOPT_TS; 365 break; 366 367 } 368 } 369 return (flags_match(cmd, bits)); 370 } 371 372 static int 373 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, 374 uint32_t *tablearg) 375 { 376 377 if (ifp == NULL) /* no iface with this packet, match fails */ 378 return (0); 379 380 /* Check by name or by IP address */ 381 if (cmd->name[0] != '\0') { /* match by name */ 382 if (cmd->name[0] == '\1') /* use tablearg to match */ 383 return ipfw_lookup_table_extended(chain, cmd->p.kidx, 0, 384 &ifp->if_index, tablearg); 385 /* Check name */ 386 if (cmd->p.glob) { 387 if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) 388 return(1); 389 } else { 390 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 391 return(1); 392 } 393 } else { 394 #if !defined(USERSPACE) && defined(__FreeBSD__) /* and OSX too ? */ 395 struct ifaddr *ia; 396 397 if_addr_rlock(ifp); 398 TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { 399 if (ia->ifa_addr->sa_family != AF_INET) 400 continue; 401 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 402 (ia->ifa_addr))->sin_addr.s_addr) { 403 if_addr_runlock(ifp); 404 return(1); /* match */ 405 } 406 } 407 if_addr_runlock(ifp); 408 #endif /* __FreeBSD__ */ 409 } 410 return(0); /* no match, fail ... */ 411 } 412 413 /* 414 * The verify_path function checks if a route to the src exists and 415 * if it is reachable via ifp (when provided). 416 * 417 * The 'verrevpath' option checks that the interface that an IP packet 418 * arrives on is the same interface that traffic destined for the 419 * packet's source address would be routed out of. 420 * The 'versrcreach' option just checks that the source address is 421 * reachable via any route (except default) in the routing table. 422 * These two are a measure to block forged packets. This is also 423 * commonly known as "anti-spoofing" or Unicast Reverse Path 424 * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs 425 * is purposely reminiscent of the Cisco IOS command, 426 * 427 * ip verify unicast reverse-path 428 * ip verify unicast source reachable-via any 429 * 430 * which implements the same functionality. But note that the syntax 431 * is misleading, and the check may be performed on all IP packets 432 * whether unicast, multicast, or broadcast. 433 */ 434 static int 435 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) 436 { 437 #if defined(USERSPACE) || !defined(__FreeBSD__) 438 return 0; 439 #else 440 struct route ro; 441 struct sockaddr_in *dst; 442 443 bzero(&ro, sizeof(ro)); 444 445 dst = (struct sockaddr_in *)&(ro.ro_dst); 446 dst->sin_family = AF_INET; 447 dst->sin_len = sizeof(*dst); 448 dst->sin_addr = src; 449 in_rtalloc_ign(&ro, 0, fib); 450 451 if (ro.ro_rt == NULL) 452 return 0; 453 454 /* 455 * If ifp is provided, check for equality with rtentry. 456 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, 457 * in order to pass packets injected back by if_simloop(): 458 * routing entry (via lo0) for our own address 459 * may exist, so we need to handle routing assymetry. 460 */ 461 if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { 462 RTFREE(ro.ro_rt); 463 return 0; 464 } 465 466 /* if no ifp provided, check if rtentry is not default route */ 467 if (ifp == NULL && 468 satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) { 469 RTFREE(ro.ro_rt); 470 return 0; 471 } 472 473 /* or if this is a blackhole/reject route */ 474 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 475 RTFREE(ro.ro_rt); 476 return 0; 477 } 478 479 /* found valid route */ 480 RTFREE(ro.ro_rt); 481 return 1; 482 #endif /* __FreeBSD__ */ 483 } 484 485 #ifdef INET6 486 /* 487 * ipv6 specific rules here... 488 */ 489 static __inline int 490 icmp6type_match (int type, ipfw_insn_u32 *cmd) 491 { 492 return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); 493 } 494 495 static int 496 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) 497 { 498 int i; 499 for (i=0; i <= cmd->o.arg1; ++i ) 500 if (curr_flow == cmd->d[i] ) 501 return 1; 502 return 0; 503 } 504 505 /* support for IP6_*_ME opcodes */ 506 static const struct in6_addr lla_mask = {{{ 507 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 508 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 509 }}}; 510 511 static int 512 ipfw_localip6(struct in6_addr *in6) 513 { 514 struct rm_priotracker in6_ifa_tracker; 515 struct in6_ifaddr *ia; 516 517 if (IN6_IS_ADDR_MULTICAST(in6)) 518 return (0); 519 520 if (!IN6_IS_ADDR_LINKLOCAL(in6)) 521 return (in6_localip(in6)); 522 523 IN6_IFADDR_RLOCK(&in6_ifa_tracker); 524 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 525 if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) 526 continue; 527 if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr, 528 in6, &lla_mask)) { 529 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 530 return (1); 531 } 532 } 533 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 534 return (0); 535 } 536 537 static int 538 verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) 539 { 540 struct route_in6 ro; 541 struct sockaddr_in6 *dst; 542 543 bzero(&ro, sizeof(ro)); 544 545 dst = (struct sockaddr_in6 * )&(ro.ro_dst); 546 dst->sin6_family = AF_INET6; 547 dst->sin6_len = sizeof(*dst); 548 dst->sin6_addr = *src; 549 550 in6_rtalloc_ign(&ro, 0, fib); 551 if (ro.ro_rt == NULL) 552 return 0; 553 554 /* 555 * if ifp is provided, check for equality with rtentry 556 * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, 557 * to support the case of sending packets to an address of our own. 558 * (where the former interface is the first argument of if_simloop() 559 * (=ifp), the latter is lo0) 560 */ 561 if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { 562 RTFREE(ro.ro_rt); 563 return 0; 564 } 565 566 /* if no ifp provided, check if rtentry is not default route */ 567 if (ifp == NULL && 568 IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) { 569 RTFREE(ro.ro_rt); 570 return 0; 571 } 572 573 /* or if this is a blackhole/reject route */ 574 if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 575 RTFREE(ro.ro_rt); 576 return 0; 577 } 578 579 /* found valid route */ 580 RTFREE(ro.ro_rt); 581 return 1; 582 583 } 584 585 static int 586 is_icmp6_query(int icmp6_type) 587 { 588 if ((icmp6_type <= ICMP6_MAXTYPE) && 589 (icmp6_type == ICMP6_ECHO_REQUEST || 590 icmp6_type == ICMP6_MEMBERSHIP_QUERY || 591 icmp6_type == ICMP6_WRUREQUEST || 592 icmp6_type == ICMP6_FQDN_QUERY || 593 icmp6_type == ICMP6_NI_QUERY)) 594 return (1); 595 596 return (0); 597 } 598 599 static void 600 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) 601 { 602 struct mbuf *m; 603 604 m = args->m; 605 if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { 606 struct tcphdr *tcp; 607 tcp = (struct tcphdr *)((char *)ip6 + hlen); 608 609 if ((tcp->th_flags & TH_RST) == 0) { 610 struct mbuf *m0; 611 m0 = ipfw_send_pkt(args->m, &(args->f_id), 612 ntohl(tcp->th_seq), ntohl(tcp->th_ack), 613 tcp->th_flags | TH_RST); 614 if (m0 != NULL) 615 ip6_output(m0, NULL, NULL, 0, NULL, NULL, 616 NULL); 617 } 618 FREE_PKT(m); 619 } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ 620 #if 0 621 /* 622 * Unlike above, the mbufs need to line up with the ip6 hdr, 623 * as the contents are read. We need to m_adj() the 624 * needed amount. 625 * The mbuf will however be thrown away so we can adjust it. 626 * Remember we did an m_pullup on it already so we 627 * can make some assumptions about contiguousness. 628 */ 629 if (args->L3offset) 630 m_adj(m, args->L3offset); 631 #endif 632 icmp6_error(m, ICMP6_DST_UNREACH, code, 0); 633 } else 634 FREE_PKT(m); 635 636 args->m = NULL; 637 } 638 639 #endif /* INET6 */ 640 641 642 /* 643 * sends a reject message, consuming the mbuf passed as an argument. 644 */ 645 static void 646 send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip) 647 { 648 649 #if 0 650 /* XXX When ip is not guaranteed to be at mtod() we will 651 * need to account for this */ 652 * The mbuf will however be thrown away so we can adjust it. 653 * Remember we did an m_pullup on it already so we 654 * can make some assumptions about contiguousness. 655 */ 656 if (args->L3offset) 657 m_adj(m, args->L3offset); 658 #endif 659 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 660 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 661 } else if (args->f_id.proto == IPPROTO_TCP) { 662 struct tcphdr *const tcp = 663 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 664 if ( (tcp->th_flags & TH_RST) == 0) { 665 struct mbuf *m; 666 m = ipfw_send_pkt(args->m, &(args->f_id), 667 ntohl(tcp->th_seq), ntohl(tcp->th_ack), 668 tcp->th_flags | TH_RST); 669 if (m != NULL) 670 ip_output(m, NULL, NULL, 0, NULL, NULL); 671 } 672 FREE_PKT(args->m); 673 } else 674 FREE_PKT(args->m); 675 args->m = NULL; 676 } 677 678 /* 679 * Support for uid/gid/jail lookup. These tests are expensive 680 * (because we may need to look into the list of active sockets) 681 * so we cache the results. ugid_lookupp is 0 if we have not 682 * yet done a lookup, 1 if we succeeded, and -1 if we tried 683 * and failed. The function always returns the match value. 684 * We could actually spare the variable and use *uc, setting 685 * it to '(void *)check_uidgid if we have no info, NULL if 686 * we tried and failed, or any other value if successful. 687 */ 688 static int 689 check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp, 690 struct ucred **uc) 691 { 692 #if defined(USERSPACE) 693 return 0; // not supported in userspace 694 #else 695 #ifndef __FreeBSD__ 696 /* XXX */ 697 return cred_check(insn, proto, oif, 698 dst_ip, dst_port, src_ip, src_port, 699 (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb); 700 #else /* FreeBSD */ 701 struct in_addr src_ip, dst_ip; 702 struct inpcbinfo *pi; 703 struct ipfw_flow_id *id; 704 struct inpcb *pcb, *inp; 705 struct ifnet *oif; 706 int lookupflags; 707 int match; 708 709 id = &args->f_id; 710 inp = args->inp; 711 oif = args->oif; 712 713 /* 714 * Check to see if the UDP or TCP stack supplied us with 715 * the PCB. If so, rather then holding a lock and looking 716 * up the PCB, we can use the one that was supplied. 717 */ 718 if (inp && *ugid_lookupp == 0) { 719 INP_LOCK_ASSERT(inp); 720 if (inp->inp_socket != NULL) { 721 *uc = crhold(inp->inp_cred); 722 *ugid_lookupp = 1; 723 } else 724 *ugid_lookupp = -1; 725 } 726 /* 727 * If we have already been here and the packet has no 728 * PCB entry associated with it, then we can safely 729 * assume that this is a no match. 730 */ 731 if (*ugid_lookupp == -1) 732 return (0); 733 if (id->proto == IPPROTO_TCP) { 734 lookupflags = 0; 735 pi = &V_tcbinfo; 736 } else if (id->proto == IPPROTO_UDP) { 737 lookupflags = INPLOOKUP_WILDCARD; 738 pi = &V_udbinfo; 739 } else 740 return 0; 741 lookupflags |= INPLOOKUP_RLOCKPCB; 742 match = 0; 743 if (*ugid_lookupp == 0) { 744 if (id->addr_type == 6) { 745 #ifdef INET6 746 if (oif == NULL) 747 pcb = in6_pcblookup_mbuf(pi, 748 &id->src_ip6, htons(id->src_port), 749 &id->dst_ip6, htons(id->dst_port), 750 lookupflags, oif, args->m); 751 else 752 pcb = in6_pcblookup_mbuf(pi, 753 &id->dst_ip6, htons(id->dst_port), 754 &id->src_ip6, htons(id->src_port), 755 lookupflags, oif, args->m); 756 #else 757 *ugid_lookupp = -1; 758 return (0); 759 #endif 760 } else { 761 src_ip.s_addr = htonl(id->src_ip); 762 dst_ip.s_addr = htonl(id->dst_ip); 763 if (oif == NULL) 764 pcb = in_pcblookup_mbuf(pi, 765 src_ip, htons(id->src_port), 766 dst_ip, htons(id->dst_port), 767 lookupflags, oif, args->m); 768 else 769 pcb = in_pcblookup_mbuf(pi, 770 dst_ip, htons(id->dst_port), 771 src_ip, htons(id->src_port), 772 lookupflags, oif, args->m); 773 } 774 if (pcb != NULL) { 775 INP_RLOCK_ASSERT(pcb); 776 *uc = crhold(pcb->inp_cred); 777 *ugid_lookupp = 1; 778 INP_RUNLOCK(pcb); 779 } 780 if (*ugid_lookupp == 0) { 781 /* 782 * We tried and failed, set the variable to -1 783 * so we will not try again on this packet. 784 */ 785 *ugid_lookupp = -1; 786 return (0); 787 } 788 } 789 if (insn->o.opcode == O_UID) 790 match = ((*uc)->cr_uid == (uid_t)insn->d[0]); 791 else if (insn->o.opcode == O_GID) 792 match = groupmember((gid_t)insn->d[0], *uc); 793 else if (insn->o.opcode == O_JAIL) 794 match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); 795 return (match); 796 #endif /* __FreeBSD__ */ 797 #endif /* not supported in userspace */ 798 } 799 800 /* 801 * Helper function to set args with info on the rule after the matching 802 * one. slot is precise, whereas we guess rule_id as they are 803 * assigned sequentially. 804 */ 805 static inline void 806 set_match(struct ip_fw_args *args, int slot, 807 struct ip_fw_chain *chain) 808 { 809 args->rule.chain_id = chain->id; 810 args->rule.slot = slot + 1; /* we use 0 as a marker */ 811 args->rule.rule_id = 1 + chain->map[slot]->id; 812 args->rule.rulenum = chain->map[slot]->rulenum; 813 } 814 815 #ifndef LINEAR_SKIPTO 816 /* 817 * Helper function to enable cached rule lookups using 818 * cached_id and cached_pos fields in ipfw rule. 819 */ 820 static int 821 jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, 822 int tablearg, int jump_backwards) 823 { 824 int f_pos; 825 826 /* If possible use cached f_pos (in f->cached_pos), 827 * whose version is written in f->cached_id 828 * (horrible hacks to avoid changing the ABI). 829 */ 830 if (num != IP_FW_TARG && f->cached_id == chain->id) 831 f_pos = f->cached_pos; 832 else { 833 int i = IP_FW_ARG_TABLEARG(chain, num, skipto); 834 /* make sure we do not jump backward */ 835 if (jump_backwards == 0 && i <= f->rulenum) 836 i = f->rulenum + 1; 837 if (chain->idxmap != NULL) 838 f_pos = chain->idxmap[i]; 839 else 840 f_pos = ipfw_find_rule(chain, i, 0); 841 /* update the cache */ 842 if (num != IP_FW_TARG) { 843 f->cached_id = chain->id; 844 f->cached_pos = f_pos; 845 } 846 } 847 848 return (f_pos); 849 } 850 #else 851 /* 852 * Helper function to enable real fast rule lookups. 853 */ 854 static int 855 jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, 856 int tablearg, int jump_backwards) 857 { 858 int f_pos; 859 860 num = IP_FW_ARG_TABLEARG(chain, num, skipto); 861 /* make sure we do not jump backward */ 862 if (jump_backwards == 0 && num <= f->rulenum) 863 num = f->rulenum + 1; 864 f_pos = chain->idxmap[num]; 865 866 return (f_pos); 867 } 868 #endif 869 870 #define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f) 871 /* 872 * The main check routine for the firewall. 873 * 874 * All arguments are in args so we can modify them and return them 875 * back to the caller. 876 * 877 * Parameters: 878 * 879 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 880 * Starts with the IP header. 881 * args->eh (in) Mac header if present, NULL for layer3 packet. 882 * args->L3offset Number of bytes bypassed if we came from L2. 883 * e.g. often sizeof(eh) ** NOTYET ** 884 * args->oif Outgoing interface, NULL if packet is incoming. 885 * The incoming interface is in the mbuf. (in) 886 * args->divert_rule (in/out) 887 * Skip up to the first rule past this rule number; 888 * upon return, non-zero port number for divert or tee. 889 * 890 * args->rule Pointer to the last matching rule (in/out) 891 * args->next_hop Socket we are forwarding to (out). 892 * args->next_hop6 IPv6 next hop we are forwarding to (out). 893 * args->f_id Addresses grabbed from the packet (out) 894 * args->rule.info a cookie depending on rule action 895 * 896 * Return value: 897 * 898 * IP_FW_PASS the packet must be accepted 899 * IP_FW_DENY the packet must be dropped 900 * IP_FW_DIVERT divert packet, port in m_tag 901 * IP_FW_TEE tee packet, port in m_tag 902 * IP_FW_DUMMYNET to dummynet, pipe in args->cookie 903 * IP_FW_NETGRAPH into netgraph, cookie args->cookie 904 * args->rule contains the matching rule, 905 * args->rule.info has additional information. 906 * 907 */ 908 int 909 ipfw_chk(struct ip_fw_args *args) 910 { 911 912 /* 913 * Local variables holding state while processing a packet: 914 * 915 * IMPORTANT NOTE: to speed up the processing of rules, there 916 * are some assumption on the values of the variables, which 917 * are documented here. Should you change them, please check 918 * the implementation of the various instructions to make sure 919 * that they still work. 920 * 921 * args->eh The MAC header. It is non-null for a layer2 922 * packet, it is NULL for a layer-3 packet. 923 * **notyet** 924 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. 925 * 926 * m | args->m Pointer to the mbuf, as received from the caller. 927 * It may change if ipfw_chk() does an m_pullup, or if it 928 * consumes the packet because it calls send_reject(). 929 * XXX This has to change, so that ipfw_chk() never modifies 930 * or consumes the buffer. 931 * ip is the beginning of the ip(4 or 6) header. 932 * Calculated by adding the L3offset to the start of data. 933 * (Until we start using L3offset, the packet is 934 * supposed to start with the ip header). 935 */ 936 struct mbuf *m = args->m; 937 struct ip *ip = mtod(m, struct ip *); 938 939 /* 940 * For rules which contain uid/gid or jail constraints, cache 941 * a copy of the users credentials after the pcb lookup has been 942 * executed. This will speed up the processing of rules with 943 * these types of constraints, as well as decrease contention 944 * on pcb related locks. 945 */ 946 #ifndef __FreeBSD__ 947 struct bsd_ucred ucred_cache; 948 #else 949 struct ucred *ucred_cache = NULL; 950 #endif 951 int ucred_lookup = 0; 952 953 /* 954 * oif | args->oif If NULL, ipfw_chk has been called on the 955 * inbound path (ether_input, ip_input). 956 * If non-NULL, ipfw_chk has been called on the outbound path 957 * (ether_output, ip_output). 958 */ 959 struct ifnet *oif = args->oif; 960 961 int f_pos = 0; /* index of current rule in the array */ 962 int retval = 0; 963 964 /* 965 * hlen The length of the IP header. 966 */ 967 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 968 969 /* 970 * offset The offset of a fragment. offset != 0 means that 971 * we have a fragment at this offset of an IPv4 packet. 972 * offset == 0 means that (if this is an IPv4 packet) 973 * this is the first or only fragment. 974 * For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header 975 * or there is a single packet fragement (fragement header added 976 * without needed). We will treat a single packet fragment as if 977 * there was no fragment header (or log/block depending on the 978 * V_fw_permit_single_frag6 sysctl setting). 979 */ 980 u_short offset = 0; 981 u_short ip6f_mf = 0; 982 983 /* 984 * Local copies of addresses. They are only valid if we have 985 * an IP packet. 986 * 987 * proto The protocol. Set to 0 for non-ip packets, 988 * or to the protocol read from the packet otherwise. 989 * proto != 0 means that we have an IPv4 packet. 990 * 991 * src_port, dst_port port numbers, in HOST format. Only 992 * valid for TCP and UDP packets. 993 * 994 * src_ip, dst_ip ip addresses, in NETWORK format. 995 * Only valid for IPv4 packets. 996 */ 997 uint8_t proto; 998 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ 999 struct in_addr src_ip, dst_ip; /* NOTE: network format */ 1000 uint16_t iplen=0; 1001 int pktlen; 1002 uint16_t etype = 0; /* Host order stored ether type */ 1003 1004 /* 1005 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 1006 * MATCH_NONE when checked and not matched (q = NULL), 1007 * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) 1008 */ 1009 int dyn_dir = MATCH_UNKNOWN; 1010 ipfw_dyn_rule *q = NULL; 1011 struct ip_fw_chain *chain = &V_layer3_chain; 1012 1013 /* 1014 * We store in ulp a pointer to the upper layer protocol header. 1015 * In the ipv4 case this is easy to determine from the header, 1016 * but for ipv6 we might have some additional headers in the middle. 1017 * ulp is NULL if not found. 1018 */ 1019 void *ulp = NULL; /* upper layer protocol pointer. */ 1020 1021 /* XXX ipv6 variables */ 1022 int is_ipv6 = 0; 1023 uint8_t icmp6_type = 0; 1024 uint16_t ext_hd = 0; /* bits vector for extension header filtering */ 1025 /* end of ipv6 variables */ 1026 1027 int is_ipv4 = 0; 1028 1029 int done = 0; /* flag to exit the outer loop */ 1030 IPFW_RLOCK_TRACKER; 1031 1032 if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) 1033 return (IP_FW_PASS); /* accept */ 1034 1035 dst_ip.s_addr = 0; /* make sure it is initialized */ 1036 src_ip.s_addr = 0; /* make sure it is initialized */ 1037 pktlen = m->m_pkthdr.len; 1038 args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ 1039 proto = args->f_id.proto = 0; /* mark f_id invalid */ 1040 /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ 1041 1042 /* 1043 * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, 1044 * then it sets p to point at the offset "len" in the mbuf. WARNING: the 1045 * pointer might become stale after other pullups (but we never use it 1046 * this way). 1047 */ 1048 #define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) 1049 #define PULLUP_LEN(_len, p, T) \ 1050 do { \ 1051 int x = (_len) + T; \ 1052 if ((m)->m_len < x) { \ 1053 args->m = m = m_pullup(m, x); \ 1054 if (m == NULL) \ 1055 goto pullup_failed; \ 1056 } \ 1057 p = (mtod(m, char *) + (_len)); \ 1058 } while (0) 1059 1060 /* 1061 * if we have an ether header, 1062 */ 1063 if (args->eh) 1064 etype = ntohs(args->eh->ether_type); 1065 1066 /* Identify IP packets and fill up variables. */ 1067 if (pktlen >= sizeof(struct ip6_hdr) && 1068 (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { 1069 struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; 1070 is_ipv6 = 1; 1071 args->f_id.addr_type = 6; 1072 hlen = sizeof(struct ip6_hdr); 1073 proto = ip6->ip6_nxt; 1074 1075 /* Search extension headers to find upper layer protocols */ 1076 while (ulp == NULL && offset == 0) { 1077 switch (proto) { 1078 case IPPROTO_ICMPV6: 1079 PULLUP_TO(hlen, ulp, struct icmp6_hdr); 1080 icmp6_type = ICMP6(ulp)->icmp6_type; 1081 break; 1082 1083 case IPPROTO_TCP: 1084 PULLUP_TO(hlen, ulp, struct tcphdr); 1085 dst_port = TCP(ulp)->th_dport; 1086 src_port = TCP(ulp)->th_sport; 1087 /* save flags for dynamic rules */ 1088 args->f_id._flags = TCP(ulp)->th_flags; 1089 break; 1090 1091 case IPPROTO_SCTP: 1092 PULLUP_TO(hlen, ulp, struct sctphdr); 1093 src_port = SCTP(ulp)->src_port; 1094 dst_port = SCTP(ulp)->dest_port; 1095 break; 1096 1097 case IPPROTO_UDP: 1098 PULLUP_TO(hlen, ulp, struct udphdr); 1099 dst_port = UDP(ulp)->uh_dport; 1100 src_port = UDP(ulp)->uh_sport; 1101 break; 1102 1103 case IPPROTO_HOPOPTS: /* RFC 2460 */ 1104 PULLUP_TO(hlen, ulp, struct ip6_hbh); 1105 ext_hd |= EXT_HOPOPTS; 1106 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; 1107 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 1108 ulp = NULL; 1109 break; 1110 1111 case IPPROTO_ROUTING: /* RFC 2460 */ 1112 PULLUP_TO(hlen, ulp, struct ip6_rthdr); 1113 switch (((struct ip6_rthdr *)ulp)->ip6r_type) { 1114 case 0: 1115 ext_hd |= EXT_RTHDR0; 1116 break; 1117 case 2: 1118 ext_hd |= EXT_RTHDR2; 1119 break; 1120 default: 1121 if (V_fw_verbose) 1122 printf("IPFW2: IPV6 - Unknown " 1123 "Routing Header type(%d)\n", 1124 ((struct ip6_rthdr *) 1125 ulp)->ip6r_type); 1126 if (V_fw_deny_unknown_exthdrs) 1127 return (IP_FW_DENY); 1128 break; 1129 } 1130 ext_hd |= EXT_ROUTING; 1131 hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; 1132 proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; 1133 ulp = NULL; 1134 break; 1135 1136 case IPPROTO_FRAGMENT: /* RFC 2460 */ 1137 PULLUP_TO(hlen, ulp, struct ip6_frag); 1138 ext_hd |= EXT_FRAGMENT; 1139 hlen += sizeof (struct ip6_frag); 1140 proto = ((struct ip6_frag *)ulp)->ip6f_nxt; 1141 offset = ((struct ip6_frag *)ulp)->ip6f_offlg & 1142 IP6F_OFF_MASK; 1143 ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg & 1144 IP6F_MORE_FRAG; 1145 if (V_fw_permit_single_frag6 == 0 && 1146 offset == 0 && ip6f_mf == 0) { 1147 if (V_fw_verbose) 1148 printf("IPFW2: IPV6 - Invalid " 1149 "Fragment Header\n"); 1150 if (V_fw_deny_unknown_exthdrs) 1151 return (IP_FW_DENY); 1152 break; 1153 } 1154 args->f_id.extra = 1155 ntohl(((struct ip6_frag *)ulp)->ip6f_ident); 1156 ulp = NULL; 1157 break; 1158 1159 case IPPROTO_DSTOPTS: /* RFC 2460 */ 1160 PULLUP_TO(hlen, ulp, struct ip6_hbh); 1161 ext_hd |= EXT_DSTOPTS; 1162 hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; 1163 proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 1164 ulp = NULL; 1165 break; 1166 1167 case IPPROTO_AH: /* RFC 2402 */ 1168 PULLUP_TO(hlen, ulp, struct ip6_ext); 1169 ext_hd |= EXT_AH; 1170 hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; 1171 proto = ((struct ip6_ext *)ulp)->ip6e_nxt; 1172 ulp = NULL; 1173 break; 1174 1175 case IPPROTO_ESP: /* RFC 2406 */ 1176 PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */ 1177 /* Anything past Seq# is variable length and 1178 * data past this ext. header is encrypted. */ 1179 ext_hd |= EXT_ESP; 1180 break; 1181 1182 case IPPROTO_NONE: /* RFC 2460 */ 1183 /* 1184 * Packet ends here, and IPv6 header has 1185 * already been pulled up. If ip6e_len!=0 1186 * then octets must be ignored. 1187 */ 1188 ulp = ip; /* non-NULL to get out of loop. */ 1189 break; 1190 1191 case IPPROTO_OSPFIGP: 1192 /* XXX OSPF header check? */ 1193 PULLUP_TO(hlen, ulp, struct ip6_ext); 1194 break; 1195 1196 case IPPROTO_PIM: 1197 /* XXX PIM header check? */ 1198 PULLUP_TO(hlen, ulp, struct pim); 1199 break; 1200 1201 case IPPROTO_CARP: 1202 PULLUP_TO(hlen, ulp, struct carp_header); 1203 if (((struct carp_header *)ulp)->carp_version != 1204 CARP_VERSION) 1205 return (IP_FW_DENY); 1206 if (((struct carp_header *)ulp)->carp_type != 1207 CARP_ADVERTISEMENT) 1208 return (IP_FW_DENY); 1209 break; 1210 1211 case IPPROTO_IPV6: /* RFC 2893 */ 1212 PULLUP_TO(hlen, ulp, struct ip6_hdr); 1213 break; 1214 1215 case IPPROTO_IPV4: /* RFC 2893 */ 1216 PULLUP_TO(hlen, ulp, struct ip); 1217 break; 1218 1219 default: 1220 if (V_fw_verbose) 1221 printf("IPFW2: IPV6 - Unknown " 1222 "Extension Header(%d), ext_hd=%x\n", 1223 proto, ext_hd); 1224 if (V_fw_deny_unknown_exthdrs) 1225 return (IP_FW_DENY); 1226 PULLUP_TO(hlen, ulp, struct ip6_ext); 1227 break; 1228 } /*switch */ 1229 } 1230 ip = mtod(m, struct ip *); 1231 ip6 = (struct ip6_hdr *)ip; 1232 args->f_id.src_ip6 = ip6->ip6_src; 1233 args->f_id.dst_ip6 = ip6->ip6_dst; 1234 args->f_id.src_ip = 0; 1235 args->f_id.dst_ip = 0; 1236 args->f_id.flow_id6 = ntohl(ip6->ip6_flow); 1237 } else if (pktlen >= sizeof(struct ip) && 1238 (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) { 1239 is_ipv4 = 1; 1240 hlen = ip->ip_hl << 2; 1241 args->f_id.addr_type = 4; 1242 1243 /* 1244 * Collect parameters into local variables for faster matching. 1245 */ 1246 proto = ip->ip_p; 1247 src_ip = ip->ip_src; 1248 dst_ip = ip->ip_dst; 1249 offset = ntohs(ip->ip_off) & IP_OFFMASK; 1250 iplen = ntohs(ip->ip_len); 1251 pktlen = iplen < pktlen ? iplen : pktlen; 1252 1253 if (offset == 0) { 1254 switch (proto) { 1255 case IPPROTO_TCP: 1256 PULLUP_TO(hlen, ulp, struct tcphdr); 1257 dst_port = TCP(ulp)->th_dport; 1258 src_port = TCP(ulp)->th_sport; 1259 /* save flags for dynamic rules */ 1260 args->f_id._flags = TCP(ulp)->th_flags; 1261 break; 1262 1263 case IPPROTO_SCTP: 1264 PULLUP_TO(hlen, ulp, struct sctphdr); 1265 src_port = SCTP(ulp)->src_port; 1266 dst_port = SCTP(ulp)->dest_port; 1267 break; 1268 1269 case IPPROTO_UDP: 1270 PULLUP_TO(hlen, ulp, struct udphdr); 1271 dst_port = UDP(ulp)->uh_dport; 1272 src_port = UDP(ulp)->uh_sport; 1273 break; 1274 1275 case IPPROTO_ICMP: 1276 PULLUP_TO(hlen, ulp, struct icmphdr); 1277 //args->f_id.flags = ICMP(ulp)->icmp_type; 1278 break; 1279 1280 default: 1281 break; 1282 } 1283 } 1284 1285 ip = mtod(m, struct ip *); 1286 args->f_id.src_ip = ntohl(src_ip.s_addr); 1287 args->f_id.dst_ip = ntohl(dst_ip.s_addr); 1288 } 1289 #undef PULLUP_TO 1290 if (proto) { /* we may have port numbers, store them */ 1291 args->f_id.proto = proto; 1292 args->f_id.src_port = src_port = ntohs(src_port); 1293 args->f_id.dst_port = dst_port = ntohs(dst_port); 1294 } 1295 1296 IPFW_PF_RLOCK(chain); 1297 if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ 1298 IPFW_PF_RUNLOCK(chain); 1299 return (IP_FW_PASS); /* accept */ 1300 } 1301 if (args->rule.slot) { 1302 /* 1303 * Packet has already been tagged as a result of a previous 1304 * match on rule args->rule aka args->rule_id (PIPE, QUEUE, 1305 * REASS, NETGRAPH, DIVERT/TEE...) 1306 * Validate the slot and continue from the next one 1307 * if still present, otherwise do a lookup. 1308 */ 1309 f_pos = (args->rule.chain_id == chain->id) ? 1310 args->rule.slot : 1311 ipfw_find_rule(chain, args->rule.rulenum, 1312 args->rule.rule_id); 1313 } else { 1314 f_pos = 0; 1315 } 1316 1317 /* 1318 * Now scan the rules, and parse microinstructions for each rule. 1319 * We have two nested loops and an inner switch. Sometimes we 1320 * need to break out of one or both loops, or re-enter one of 1321 * the loops with updated variables. Loop variables are: 1322 * 1323 * f_pos (outer loop) points to the current rule. 1324 * On output it points to the matching rule. 1325 * done (outer loop) is used as a flag to break the loop. 1326 * l (inner loop) residual length of current rule. 1327 * cmd points to the current microinstruction. 1328 * 1329 * We break the inner loop by setting l=0 and possibly 1330 * cmdlen=0 if we don't want to advance cmd. 1331 * We break the outer loop by setting done=1 1332 * We can restart the inner loop by setting l>0 and f_pos, f, cmd 1333 * as needed. 1334 */ 1335 for (; f_pos < chain->n_rules; f_pos++) { 1336 ipfw_insn *cmd; 1337 uint32_t tablearg = 0; 1338 int l, cmdlen, skip_or; /* skip rest of OR block */ 1339 struct ip_fw *f; 1340 1341 f = chain->map[f_pos]; 1342 if (V_set_disable & (1 << f->set) ) 1343 continue; 1344 1345 skip_or = 0; 1346 for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; 1347 l -= cmdlen, cmd += cmdlen) { 1348 int match; 1349 1350 /* 1351 * check_body is a jump target used when we find a 1352 * CHECK_STATE, and need to jump to the body of 1353 * the target rule. 1354 */ 1355 1356 /* check_body: */ 1357 cmdlen = F_LEN(cmd); 1358 /* 1359 * An OR block (insn_1 || .. || insn_n) has the 1360 * F_OR bit set in all but the last instruction. 1361 * The first match will set "skip_or", and cause 1362 * the following instructions to be skipped until 1363 * past the one with the F_OR bit clear. 1364 */ 1365 if (skip_or) { /* skip this instruction */ 1366 if ((cmd->len & F_OR) == 0) 1367 skip_or = 0; /* next one is good */ 1368 continue; 1369 } 1370 match = 0; /* set to 1 if we succeed */ 1371 1372 switch (cmd->opcode) { 1373 /* 1374 * The first set of opcodes compares the packet's 1375 * fields with some pattern, setting 'match' if a 1376 * match is found. At the end of the loop there is 1377 * logic to deal with F_NOT and F_OR flags associated 1378 * with the opcode. 1379 */ 1380 case O_NOP: 1381 match = 1; 1382 break; 1383 1384 case O_FORWARD_MAC: 1385 printf("ipfw: opcode %d unimplemented\n", 1386 cmd->opcode); 1387 break; 1388 1389 case O_GID: 1390 case O_UID: 1391 case O_JAIL: 1392 /* 1393 * We only check offset == 0 && proto != 0, 1394 * as this ensures that we have a 1395 * packet with the ports info. 1396 */ 1397 if (offset != 0) 1398 break; 1399 if (proto == IPPROTO_TCP || 1400 proto == IPPROTO_UDP) 1401 match = check_uidgid( 1402 (ipfw_insn_u32 *)cmd, 1403 args, &ucred_lookup, 1404 #ifdef __FreeBSD__ 1405 &ucred_cache); 1406 #else 1407 (void *)&ucred_cache); 1408 #endif 1409 break; 1410 1411 case O_RECV: 1412 match = iface_match(m->m_pkthdr.rcvif, 1413 (ipfw_insn_if *)cmd, chain, &tablearg); 1414 break; 1415 1416 case O_XMIT: 1417 match = iface_match(oif, (ipfw_insn_if *)cmd, 1418 chain, &tablearg); 1419 break; 1420 1421 case O_VIA: 1422 match = iface_match(oif ? oif : 1423 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, 1424 chain, &tablearg); 1425 break; 1426 1427 case O_MACADDR2: 1428 if (args->eh != NULL) { /* have MAC header */ 1429 u_int32_t *want = (u_int32_t *) 1430 ((ipfw_insn_mac *)cmd)->addr; 1431 u_int32_t *mask = (u_int32_t *) 1432 ((ipfw_insn_mac *)cmd)->mask; 1433 u_int32_t *hdr = (u_int32_t *)args->eh; 1434 1435 match = 1436 ( want[0] == (hdr[0] & mask[0]) && 1437 want[1] == (hdr[1] & mask[1]) && 1438 want[2] == (hdr[2] & mask[2]) ); 1439 } 1440 break; 1441 1442 case O_MAC_TYPE: 1443 if (args->eh != NULL) { 1444 u_int16_t *p = 1445 ((ipfw_insn_u16 *)cmd)->ports; 1446 int i; 1447 1448 for (i = cmdlen - 1; !match && i>0; 1449 i--, p += 2) 1450 match = (etype >= p[0] && 1451 etype <= p[1]); 1452 } 1453 break; 1454 1455 case O_FRAG: 1456 match = (offset != 0); 1457 break; 1458 1459 case O_IN: /* "out" is "not in" */ 1460 match = (oif == NULL); 1461 break; 1462 1463 case O_LAYER2: 1464 match = (args->eh != NULL); 1465 break; 1466 1467 case O_DIVERTED: 1468 { 1469 /* For diverted packets, args->rule.info 1470 * contains the divert port (in host format) 1471 * reason and direction. 1472 */ 1473 uint32_t i = args->rule.info; 1474 match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT && 1475 cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2); 1476 } 1477 break; 1478 1479 case O_PROTO: 1480 /* 1481 * We do not allow an arg of 0 so the 1482 * check of "proto" only suffices. 1483 */ 1484 match = (proto == cmd->arg1); 1485 break; 1486 1487 case O_IP_SRC: 1488 match = is_ipv4 && 1489 (((ipfw_insn_ip *)cmd)->addr.s_addr == 1490 src_ip.s_addr); 1491 break; 1492 1493 case O_IP_SRC_LOOKUP: 1494 case O_IP_DST_LOOKUP: 1495 if (is_ipv4) { 1496 uint32_t key = 1497 (cmd->opcode == O_IP_DST_LOOKUP) ? 1498 dst_ip.s_addr : src_ip.s_addr; 1499 uint32_t v = 0; 1500 1501 if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { 1502 /* generic lookup. The key must be 1503 * in 32bit big-endian format. 1504 */ 1505 v = ((ipfw_insn_u32 *)cmd)->d[1]; 1506 if (v == 0) 1507 key = dst_ip.s_addr; 1508 else if (v == 1) 1509 key = src_ip.s_addr; 1510 else if (v == 6) /* dscp */ 1511 key = (ip->ip_tos >> 2) & 0x3f; 1512 else if (offset != 0) 1513 break; 1514 else if (proto != IPPROTO_TCP && 1515 proto != IPPROTO_UDP) 1516 break; 1517 else if (v == 2) 1518 key = dst_port; 1519 else if (v == 3) 1520 key = src_port; 1521 #ifndef USERSPACE 1522 else if (v == 4 || v == 5) { 1523 check_uidgid( 1524 (ipfw_insn_u32 *)cmd, 1525 args, &ucred_lookup, 1526 #ifdef __FreeBSD__ 1527 &ucred_cache); 1528 if (v == 4 /* O_UID */) 1529 key = ucred_cache->cr_uid; 1530 else if (v == 5 /* O_JAIL */) 1531 key = ucred_cache->cr_prison->pr_id; 1532 #else /* !__FreeBSD__ */ 1533 (void *)&ucred_cache); 1534 if (v ==4 /* O_UID */) 1535 key = ucred_cache.uid; 1536 else if (v == 5 /* O_JAIL */) 1537 key = ucred_cache.xid; 1538 #endif /* !__FreeBSD__ */ 1539 } 1540 #endif /* !USERSPACE */ 1541 else 1542 break; 1543 } 1544 match = ipfw_lookup_table(chain, 1545 cmd->arg1, key, &v); 1546 if (!match) 1547 break; 1548 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) 1549 match = 1550 ((ipfw_insn_u32 *)cmd)->d[0] == v; 1551 else 1552 tablearg = v; 1553 } else if (is_ipv6) { 1554 uint32_t v = 0; 1555 void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ? 1556 &args->f_id.dst_ip6: &args->f_id.src_ip6; 1557 match = ipfw_lookup_table_extended(chain, 1558 cmd->arg1, 1559 sizeof(struct in6_addr), 1560 pkey, &v); 1561 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) 1562 match = ((ipfw_insn_u32 *)cmd)->d[0] == v; 1563 if (match) 1564 tablearg = v; 1565 } 1566 break; 1567 1568 case O_IP_FLOW_LOOKUP: 1569 { 1570 uint32_t v = 0; 1571 match = ipfw_lookup_table_extended(chain, 1572 cmd->arg1, 0, &args->f_id, &v); 1573 if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) 1574 match = ((ipfw_insn_u32 *)cmd)->d[0] == v; 1575 if (match) 1576 tablearg = v; 1577 } 1578 break; 1579 case O_IP_SRC_MASK: 1580 case O_IP_DST_MASK: 1581 if (is_ipv4) { 1582 uint32_t a = 1583 (cmd->opcode == O_IP_DST_MASK) ? 1584 dst_ip.s_addr : src_ip.s_addr; 1585 uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; 1586 int i = cmdlen-1; 1587 1588 for (; !match && i>0; i-= 2, p+= 2) 1589 match = (p[0] == (a & p[1])); 1590 } 1591 break; 1592 1593 case O_IP_SRC_ME: 1594 if (is_ipv4) { 1595 struct ifnet *tif; 1596 1597 INADDR_TO_IFP(src_ip, tif); 1598 match = (tif != NULL); 1599 break; 1600 } 1601 #ifdef INET6 1602 /* FALLTHROUGH */ 1603 case O_IP6_SRC_ME: 1604 match= is_ipv6 && ipfw_localip6(&args->f_id.src_ip6); 1605 #endif 1606 break; 1607 1608 case O_IP_DST_SET: 1609 case O_IP_SRC_SET: 1610 if (is_ipv4) { 1611 u_int32_t *d = (u_int32_t *)(cmd+1); 1612 u_int32_t addr = 1613 cmd->opcode == O_IP_DST_SET ? 1614 args->f_id.dst_ip : 1615 args->f_id.src_ip; 1616 1617 if (addr < d[0]) 1618 break; 1619 addr -= d[0]; /* subtract base */ 1620 match = (addr < cmd->arg1) && 1621 ( d[ 1 + (addr>>5)] & 1622 (1<<(addr & 0x1f)) ); 1623 } 1624 break; 1625 1626 case O_IP_DST: 1627 match = is_ipv4 && 1628 (((ipfw_insn_ip *)cmd)->addr.s_addr == 1629 dst_ip.s_addr); 1630 break; 1631 1632 case O_IP_DST_ME: 1633 if (is_ipv4) { 1634 struct ifnet *tif; 1635 1636 INADDR_TO_IFP(dst_ip, tif); 1637 match = (tif != NULL); 1638 break; 1639 } 1640 #ifdef INET6 1641 /* FALLTHROUGH */ 1642 case O_IP6_DST_ME: 1643 match= is_ipv6 && ipfw_localip6(&args->f_id.dst_ip6); 1644 #endif 1645 break; 1646 1647 1648 case O_IP_SRCPORT: 1649 case O_IP_DSTPORT: 1650 /* 1651 * offset == 0 && proto != 0 is enough 1652 * to guarantee that we have a 1653 * packet with port info. 1654 */ 1655 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) 1656 && offset == 0) { 1657 u_int16_t x = 1658 (cmd->opcode == O_IP_SRCPORT) ? 1659 src_port : dst_port ; 1660 u_int16_t *p = 1661 ((ipfw_insn_u16 *)cmd)->ports; 1662 int i; 1663 1664 for (i = cmdlen - 1; !match && i>0; 1665 i--, p += 2) 1666 match = (x>=p[0] && x<=p[1]); 1667 } 1668 break; 1669 1670 case O_ICMPTYPE: 1671 match = (offset == 0 && proto==IPPROTO_ICMP && 1672 icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); 1673 break; 1674 1675 #ifdef INET6 1676 case O_ICMP6TYPE: 1677 match = is_ipv6 && offset == 0 && 1678 proto==IPPROTO_ICMPV6 && 1679 icmp6type_match( 1680 ICMP6(ulp)->icmp6_type, 1681 (ipfw_insn_u32 *)cmd); 1682 break; 1683 #endif /* INET6 */ 1684 1685 case O_IPOPT: 1686 match = (is_ipv4 && 1687 ipopts_match(ip, cmd) ); 1688 break; 1689 1690 case O_IPVER: 1691 match = (is_ipv4 && 1692 cmd->arg1 == ip->ip_v); 1693 break; 1694 1695 case O_IPID: 1696 case O_IPLEN: 1697 case O_IPTTL: 1698 if (is_ipv4) { /* only for IP packets */ 1699 uint16_t x; 1700 uint16_t *p; 1701 int i; 1702 1703 if (cmd->opcode == O_IPLEN) 1704 x = iplen; 1705 else if (cmd->opcode == O_IPTTL) 1706 x = ip->ip_ttl; 1707 else /* must be IPID */ 1708 x = ntohs(ip->ip_id); 1709 if (cmdlen == 1) { 1710 match = (cmd->arg1 == x); 1711 break; 1712 } 1713 /* otherwise we have ranges */ 1714 p = ((ipfw_insn_u16 *)cmd)->ports; 1715 i = cmdlen - 1; 1716 for (; !match && i>0; i--, p += 2) 1717 match = (x >= p[0] && x <= p[1]); 1718 } 1719 break; 1720 1721 case O_IPPRECEDENCE: 1722 match = (is_ipv4 && 1723 (cmd->arg1 == (ip->ip_tos & 0xe0)) ); 1724 break; 1725 1726 case O_IPTOS: 1727 match = (is_ipv4 && 1728 flags_match(cmd, ip->ip_tos)); 1729 break; 1730 1731 case O_DSCP: 1732 { 1733 uint32_t *p; 1734 uint16_t x; 1735 1736 p = ((ipfw_insn_u32 *)cmd)->d; 1737 1738 if (is_ipv4) 1739 x = ip->ip_tos >> 2; 1740 else if (is_ipv6) { 1741 uint8_t *v; 1742 v = &((struct ip6_hdr *)ip)->ip6_vfc; 1743 x = (*v & 0x0F) << 2; 1744 v++; 1745 x |= *v >> 6; 1746 } else 1747 break; 1748 1749 /* DSCP bitmask is stored as low_u32 high_u32 */ 1750 if (x > 32) 1751 match = *(p + 1) & (1 << (x - 32)); 1752 else 1753 match = *p & (1 << x); 1754 } 1755 break; 1756 1757 case O_TCPDATALEN: 1758 if (proto == IPPROTO_TCP && offset == 0) { 1759 struct tcphdr *tcp; 1760 uint16_t x; 1761 uint16_t *p; 1762 int i; 1763 1764 tcp = TCP(ulp); 1765 x = iplen - 1766 ((ip->ip_hl + tcp->th_off) << 2); 1767 if (cmdlen == 1) { 1768 match = (cmd->arg1 == x); 1769 break; 1770 } 1771 /* otherwise we have ranges */ 1772 p = ((ipfw_insn_u16 *)cmd)->ports; 1773 i = cmdlen - 1; 1774 for (; !match && i>0; i--, p += 2) 1775 match = (x >= p[0] && x <= p[1]); 1776 } 1777 break; 1778 1779 case O_TCPFLAGS: 1780 match = (proto == IPPROTO_TCP && offset == 0 && 1781 flags_match(cmd, TCP(ulp)->th_flags)); 1782 break; 1783 1784 case O_TCPOPTS: 1785 if (proto == IPPROTO_TCP && offset == 0 && ulp){ 1786 PULLUP_LEN(hlen, ulp, 1787 (TCP(ulp)->th_off << 2)); 1788 match = tcpopts_match(TCP(ulp), cmd); 1789 } 1790 break; 1791 1792 case O_TCPSEQ: 1793 match = (proto == IPPROTO_TCP && offset == 0 && 1794 ((ipfw_insn_u32 *)cmd)->d[0] == 1795 TCP(ulp)->th_seq); 1796 break; 1797 1798 case O_TCPACK: 1799 match = (proto == IPPROTO_TCP && offset == 0 && 1800 ((ipfw_insn_u32 *)cmd)->d[0] == 1801 TCP(ulp)->th_ack); 1802 break; 1803 1804 case O_TCPWIN: 1805 if (proto == IPPROTO_TCP && offset == 0) { 1806 uint16_t x; 1807 uint16_t *p; 1808 int i; 1809 1810 x = ntohs(TCP(ulp)->th_win); 1811 if (cmdlen == 1) { 1812 match = (cmd->arg1 == x); 1813 break; 1814 } 1815 /* Otherwise we have ranges. */ 1816 p = ((ipfw_insn_u16 *)cmd)->ports; 1817 i = cmdlen - 1; 1818 for (; !match && i > 0; i--, p += 2) 1819 match = (x >= p[0] && x <= p[1]); 1820 } 1821 break; 1822 1823 case O_ESTAB: 1824 /* reject packets which have SYN only */ 1825 /* XXX should i also check for TH_ACK ? */ 1826 match = (proto == IPPROTO_TCP && offset == 0 && 1827 (TCP(ulp)->th_flags & 1828 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 1829 break; 1830 1831 case O_ALTQ: { 1832 struct pf_mtag *at; 1833 struct m_tag *mtag; 1834 ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; 1835 1836 /* 1837 * ALTQ uses mbuf tags from another 1838 * packet filtering system - pf(4). 1839 * We allocate a tag in its format 1840 * and fill it in, pretending to be pf(4). 1841 */ 1842 match = 1; 1843 at = pf_find_mtag(m); 1844 if (at != NULL && at->qid != 0) 1845 break; 1846 mtag = m_tag_get(PACKET_TAG_PF, 1847 sizeof(struct pf_mtag), M_NOWAIT | M_ZERO); 1848 if (mtag == NULL) { 1849 /* 1850 * Let the packet fall back to the 1851 * default ALTQ. 1852 */ 1853 break; 1854 } 1855 m_tag_prepend(m, mtag); 1856 at = (struct pf_mtag *)(mtag + 1); 1857 at->qid = altq->qid; 1858 at->hdr = ip; 1859 break; 1860 } 1861 1862 case O_LOG: 1863 ipfw_log(chain, f, hlen, args, m, 1864 oif, offset | ip6f_mf, tablearg, ip); 1865 match = 1; 1866 break; 1867 1868 case O_PROB: 1869 match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); 1870 break; 1871 1872 case O_VERREVPATH: 1873 /* Outgoing packets automatically pass/match */ 1874 match = ((oif != NULL) || 1875 (m->m_pkthdr.rcvif == NULL) || 1876 ( 1877 #ifdef INET6 1878 is_ipv6 ? 1879 verify_path6(&(args->f_id.src_ip6), 1880 m->m_pkthdr.rcvif, args->f_id.fib) : 1881 #endif 1882 verify_path(src_ip, m->m_pkthdr.rcvif, 1883 args->f_id.fib))); 1884 break; 1885 1886 case O_VERSRCREACH: 1887 /* Outgoing packets automatically pass/match */ 1888 match = (hlen > 0 && ((oif != NULL) || 1889 #ifdef INET6 1890 is_ipv6 ? 1891 verify_path6(&(args->f_id.src_ip6), 1892 NULL, args->f_id.fib) : 1893 #endif 1894 verify_path(src_ip, NULL, args->f_id.fib))); 1895 break; 1896 1897 case O_ANTISPOOF: 1898 /* Outgoing packets automatically pass/match */ 1899 if (oif == NULL && hlen > 0 && 1900 ( (is_ipv4 && in_localaddr(src_ip)) 1901 #ifdef INET6 1902 || (is_ipv6 && 1903 in6_localaddr(&(args->f_id.src_ip6))) 1904 #endif 1905 )) 1906 match = 1907 #ifdef INET6 1908 is_ipv6 ? verify_path6( 1909 &(args->f_id.src_ip6), 1910 m->m_pkthdr.rcvif, 1911 args->f_id.fib) : 1912 #endif 1913 verify_path(src_ip, 1914 m->m_pkthdr.rcvif, 1915 args->f_id.fib); 1916 else 1917 match = 1; 1918 break; 1919 1920 case O_IPSEC: 1921 #ifdef IPSEC 1922 match = (m_tag_find(m, 1923 PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); 1924 #endif 1925 /* otherwise no match */ 1926 break; 1927 1928 #ifdef INET6 1929 case O_IP6_SRC: 1930 match = is_ipv6 && 1931 IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, 1932 &((ipfw_insn_ip6 *)cmd)->addr6); 1933 break; 1934 1935 case O_IP6_DST: 1936 match = is_ipv6 && 1937 IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, 1938 &((ipfw_insn_ip6 *)cmd)->addr6); 1939 break; 1940 case O_IP6_SRC_MASK: 1941 case O_IP6_DST_MASK: 1942 if (is_ipv6) { 1943 int i = cmdlen - 1; 1944 struct in6_addr p; 1945 struct in6_addr *d = 1946 &((ipfw_insn_ip6 *)cmd)->addr6; 1947 1948 for (; !match && i > 0; d += 2, 1949 i -= F_INSN_SIZE(struct in6_addr) 1950 * 2) { 1951 p = (cmd->opcode == 1952 O_IP6_SRC_MASK) ? 1953 args->f_id.src_ip6: 1954 args->f_id.dst_ip6; 1955 APPLY_MASK(&p, &d[1]); 1956 match = 1957 IN6_ARE_ADDR_EQUAL(&d[0], 1958 &p); 1959 } 1960 } 1961 break; 1962 1963 case O_FLOW6ID: 1964 match = is_ipv6 && 1965 flow6id_match(args->f_id.flow_id6, 1966 (ipfw_insn_u32 *) cmd); 1967 break; 1968 1969 case O_EXT_HDR: 1970 match = is_ipv6 && 1971 (ext_hd & ((ipfw_insn *) cmd)->arg1); 1972 break; 1973 1974 case O_IP6: 1975 match = is_ipv6; 1976 break; 1977 #endif 1978 1979 case O_IP4: 1980 match = is_ipv4; 1981 break; 1982 1983 case O_TAG: { 1984 struct m_tag *mtag; 1985 uint32_t tag = TARG(cmd->arg1, tag); 1986 1987 /* Packet is already tagged with this tag? */ 1988 mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); 1989 1990 /* We have `untag' action when F_NOT flag is 1991 * present. And we must remove this mtag from 1992 * mbuf and reset `match' to zero (`match' will 1993 * be inversed later). 1994 * Otherwise we should allocate new mtag and 1995 * push it into mbuf. 1996 */ 1997 if (cmd->len & F_NOT) { /* `untag' action */ 1998 if (mtag != NULL) 1999 m_tag_delete(m, mtag); 2000 match = 0; 2001 } else { 2002 if (mtag == NULL) { 2003 mtag = m_tag_alloc( MTAG_IPFW, 2004 tag, 0, M_NOWAIT); 2005 if (mtag != NULL) 2006 m_tag_prepend(m, mtag); 2007 } 2008 match = 1; 2009 } 2010 break; 2011 } 2012 2013 case O_FIB: /* try match the specified fib */ 2014 if (args->f_id.fib == cmd->arg1) 2015 match = 1; 2016 break; 2017 2018 case O_SOCKARG: { 2019 #ifndef USERSPACE /* not supported in userspace */ 2020 struct inpcb *inp = args->inp; 2021 struct inpcbinfo *pi; 2022 2023 if (is_ipv6) /* XXX can we remove this ? */ 2024 break; 2025 2026 if (proto == IPPROTO_TCP) 2027 pi = &V_tcbinfo; 2028 else if (proto == IPPROTO_UDP) 2029 pi = &V_udbinfo; 2030 else 2031 break; 2032 2033 /* 2034 * XXXRW: so_user_cookie should almost 2035 * certainly be inp_user_cookie? 2036 */ 2037 2038 /* For incomming packet, lookup up the 2039 inpcb using the src/dest ip/port tuple */ 2040 if (inp == NULL) { 2041 inp = in_pcblookup(pi, 2042 src_ip, htons(src_port), 2043 dst_ip, htons(dst_port), 2044 INPLOOKUP_RLOCKPCB, NULL); 2045 if (inp != NULL) { 2046 tablearg = 2047 inp->inp_socket->so_user_cookie; 2048 if (tablearg) 2049 match = 1; 2050 INP_RUNLOCK(inp); 2051 } 2052 } else { 2053 if (inp->inp_socket) { 2054 tablearg = 2055 inp->inp_socket->so_user_cookie; 2056 if (tablearg) 2057 match = 1; 2058 } 2059 } 2060 #endif /* !USERSPACE */ 2061 break; 2062 } 2063 2064 case O_TAGGED: { 2065 struct m_tag *mtag; 2066 uint32_t tag = TARG(cmd->arg1, tag); 2067 2068 if (cmdlen == 1) { 2069 match = m_tag_locate(m, MTAG_IPFW, 2070 tag, NULL) != NULL; 2071 break; 2072 } 2073 2074 /* we have ranges */ 2075 for (mtag = m_tag_first(m); 2076 mtag != NULL && !match; 2077 mtag = m_tag_next(m, mtag)) { 2078 uint16_t *p; 2079 int i; 2080 2081 if (mtag->m_tag_cookie != MTAG_IPFW) 2082 continue; 2083 2084 p = ((ipfw_insn_u16 *)cmd)->ports; 2085 i = cmdlen - 1; 2086 for(; !match && i > 0; i--, p += 2) 2087 match = 2088 mtag->m_tag_id >= p[0] && 2089 mtag->m_tag_id <= p[1]; 2090 } 2091 break; 2092 } 2093 2094 /* 2095 * The second set of opcodes represents 'actions', 2096 * i.e. the terminal part of a rule once the packet 2097 * matches all previous patterns. 2098 * Typically there is only one action for each rule, 2099 * and the opcode is stored at the end of the rule 2100 * (but there are exceptions -- see below). 2101 * 2102 * In general, here we set retval and terminate the 2103 * outer loop (would be a 'break 3' in some language, 2104 * but we need to set l=0, done=1) 2105 * 2106 * Exceptions: 2107 * O_COUNT and O_SKIPTO actions: 2108 * instead of terminating, we jump to the next rule 2109 * (setting l=0), or to the SKIPTO target (setting 2110 * f/f_len, cmd and l as needed), respectively. 2111 * 2112 * O_TAG, O_LOG and O_ALTQ action parameters: 2113 * perform some action and set match = 1; 2114 * 2115 * O_LIMIT and O_KEEP_STATE: these opcodes are 2116 * not real 'actions', and are stored right 2117 * before the 'action' part of the rule. 2118 * These opcodes try to install an entry in the 2119 * state tables; if successful, we continue with 2120 * the next opcode (match=1; break;), otherwise 2121 * the packet must be dropped (set retval, 2122 * break loops with l=0, done=1) 2123 * 2124 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 2125 * cause a lookup of the state table, and a jump 2126 * to the 'action' part of the parent rule 2127 * if an entry is found, or 2128 * (CHECK_STATE only) a jump to the next rule if 2129 * the entry is not found. 2130 * The result of the lookup is cached so that 2131 * further instances of these opcodes become NOPs. 2132 * The jump to the next rule is done by setting 2133 * l=0, cmdlen=0. 2134 */ 2135 case O_LIMIT: 2136 case O_KEEP_STATE: 2137 if (ipfw_install_state(chain, f, 2138 (ipfw_insn_limit *)cmd, args, tablearg)) { 2139 /* error or limit violation */ 2140 retval = IP_FW_DENY; 2141 l = 0; /* exit inner loop */ 2142 done = 1; /* exit outer loop */ 2143 } 2144 match = 1; 2145 break; 2146 2147 case O_PROBE_STATE: 2148 case O_CHECK_STATE: 2149 /* 2150 * dynamic rules are checked at the first 2151 * keep-state or check-state occurrence, 2152 * with the result being stored in dyn_dir. 2153 * The compiler introduces a PROBE_STATE 2154 * instruction for us when we have a 2155 * KEEP_STATE (because PROBE_STATE needs 2156 * to be run first). 2157 */ 2158 if (dyn_dir == MATCH_UNKNOWN && 2159 (q = ipfw_lookup_dyn_rule(&args->f_id, 2160 &dyn_dir, proto == IPPROTO_TCP ? 2161 TCP(ulp) : NULL)) 2162 != NULL) { 2163 /* 2164 * Found dynamic entry, update stats 2165 * and jump to the 'action' part of 2166 * the parent rule by setting 2167 * f, cmd, l and clearing cmdlen. 2168 */ 2169 IPFW_INC_DYN_COUNTER(q, pktlen); 2170 /* XXX we would like to have f_pos 2171 * readily accessible in the dynamic 2172 * rule, instead of having to 2173 * lookup q->rule. 2174 */ 2175 f = q->rule; 2176 f_pos = ipfw_find_rule(chain, 2177 f->rulenum, f->id); 2178 cmd = ACTION_PTR(f); 2179 l = f->cmd_len - f->act_ofs; 2180 ipfw_dyn_unlock(q); 2181 cmdlen = 0; 2182 match = 1; 2183 break; 2184 } 2185 /* 2186 * Dynamic entry not found. If CHECK_STATE, 2187 * skip to next rule, if PROBE_STATE just 2188 * ignore and continue with next opcode. 2189 */ 2190 if (cmd->opcode == O_CHECK_STATE) 2191 l = 0; /* exit inner loop */ 2192 match = 1; 2193 break; 2194 2195 case O_ACCEPT: 2196 retval = 0; /* accept */ 2197 l = 0; /* exit inner loop */ 2198 done = 1; /* exit outer loop */ 2199 break; 2200 2201 case O_PIPE: 2202 case O_QUEUE: 2203 set_match(args, f_pos, chain); 2204 args->rule.info = TARG(cmd->arg1, pipe); 2205 if (cmd->opcode == O_PIPE) 2206 args->rule.info |= IPFW_IS_PIPE; 2207 if (V_fw_one_pass) 2208 args->rule.info |= IPFW_ONEPASS; 2209 retval = IP_FW_DUMMYNET; 2210 l = 0; /* exit inner loop */ 2211 done = 1; /* exit outer loop */ 2212 break; 2213 2214 case O_DIVERT: 2215 case O_TEE: 2216 if (args->eh) /* not on layer 2 */ 2217 break; 2218 /* otherwise this is terminal */ 2219 l = 0; /* exit inner loop */ 2220 done = 1; /* exit outer loop */ 2221 retval = (cmd->opcode == O_DIVERT) ? 2222 IP_FW_DIVERT : IP_FW_TEE; 2223 set_match(args, f_pos, chain); 2224 args->rule.info = TARG(cmd->arg1, divert); 2225 break; 2226 2227 case O_COUNT: 2228 IPFW_INC_RULE_COUNTER(f, pktlen); 2229 l = 0; /* exit inner loop */ 2230 break; 2231 2232 case O_SKIPTO: 2233 IPFW_INC_RULE_COUNTER(f, pktlen); 2234 f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0); 2235 /* 2236 * Skip disabled rules, and re-enter 2237 * the inner loop with the correct 2238 * f_pos, f, l and cmd. 2239 * Also clear cmdlen and skip_or 2240 */ 2241 for (; f_pos < chain->n_rules - 1 && 2242 (V_set_disable & 2243 (1 << chain->map[f_pos]->set)); 2244 f_pos++) 2245 ; 2246 /* Re-enter the inner loop at the skipto rule. */ 2247 f = chain->map[f_pos]; 2248 l = f->cmd_len; 2249 cmd = f->cmd; 2250 match = 1; 2251 cmdlen = 0; 2252 skip_or = 0; 2253 continue; 2254 break; /* not reached */ 2255 2256 case O_CALLRETURN: { 2257 /* 2258 * Implementation of `subroutine' call/return, 2259 * in the stack carried in an mbuf tag. This 2260 * is different from `skipto' in that any call 2261 * address is possible (`skipto' must prevent 2262 * backward jumps to avoid endless loops). 2263 * We have `return' action when F_NOT flag is 2264 * present. The `m_tag_id' field is used as 2265 * stack pointer. 2266 */ 2267 struct m_tag *mtag; 2268 uint16_t jmpto, *stack; 2269 2270 #define IS_CALL ((cmd->len & F_NOT) == 0) 2271 #define IS_RETURN ((cmd->len & F_NOT) != 0) 2272 /* 2273 * Hand-rolled version of m_tag_locate() with 2274 * wildcard `type'. 2275 * If not already tagged, allocate new tag. 2276 */ 2277 mtag = m_tag_first(m); 2278 while (mtag != NULL) { 2279 if (mtag->m_tag_cookie == 2280 MTAG_IPFW_CALL) 2281 break; 2282 mtag = m_tag_next(m, mtag); 2283 } 2284 if (mtag == NULL && IS_CALL) { 2285 mtag = m_tag_alloc(MTAG_IPFW_CALL, 0, 2286 IPFW_CALLSTACK_SIZE * 2287 sizeof(uint16_t), M_NOWAIT); 2288 if (mtag != NULL) 2289 m_tag_prepend(m, mtag); 2290 } 2291 2292 /* 2293 * On error both `call' and `return' just 2294 * continue with next rule. 2295 */ 2296 if (IS_RETURN && (mtag == NULL || 2297 mtag->m_tag_id == 0)) { 2298 l = 0; /* exit inner loop */ 2299 break; 2300 } 2301 if (IS_CALL && (mtag == NULL || 2302 mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) { 2303 printf("ipfw: call stack error, " 2304 "go to next rule\n"); 2305 l = 0; /* exit inner loop */ 2306 break; 2307 } 2308 2309 IPFW_INC_RULE_COUNTER(f, pktlen); 2310 stack = (uint16_t *)(mtag + 1); 2311 2312 /* 2313 * The `call' action may use cached f_pos 2314 * (in f->next_rule), whose version is written 2315 * in f->next_rule. 2316 * The `return' action, however, doesn't have 2317 * fixed jump address in cmd->arg1 and can't use 2318 * cache. 2319 */ 2320 if (IS_CALL) { 2321 stack[mtag->m_tag_id] = f->rulenum; 2322 mtag->m_tag_id++; 2323 f_pos = JUMP(chain, f, cmd->arg1, 2324 tablearg, 1); 2325 } else { /* `return' action */ 2326 mtag->m_tag_id--; 2327 jmpto = stack[mtag->m_tag_id] + 1; 2328 f_pos = ipfw_find_rule(chain, jmpto, 0); 2329 } 2330 2331 /* 2332 * Skip disabled rules, and re-enter 2333 * the inner loop with the correct 2334 * f_pos, f, l and cmd. 2335 * Also clear cmdlen and skip_or 2336 */ 2337 for (; f_pos < chain->n_rules - 1 && 2338 (V_set_disable & 2339 (1 << chain->map[f_pos]->set)); f_pos++) 2340 ; 2341 /* Re-enter the inner loop at the dest rule. */ 2342 f = chain->map[f_pos]; 2343 l = f->cmd_len; 2344 cmd = f->cmd; 2345 cmdlen = 0; 2346 skip_or = 0; 2347 continue; 2348 break; /* NOTREACHED */ 2349 } 2350 #undef IS_CALL 2351 #undef IS_RETURN 2352 2353 case O_REJECT: 2354 /* 2355 * Drop the packet and send a reject notice 2356 * if the packet is not ICMP (or is an ICMP 2357 * query), and it is not multicast/broadcast. 2358 */ 2359 if (hlen > 0 && is_ipv4 && offset == 0 && 2360 (proto != IPPROTO_ICMP || 2361 is_icmp_query(ICMP(ulp))) && 2362 !(m->m_flags & (M_BCAST|M_MCAST)) && 2363 !IN_MULTICAST(ntohl(dst_ip.s_addr))) { 2364 send_reject(args, cmd->arg1, iplen, ip); 2365 m = args->m; 2366 } 2367 /* FALLTHROUGH */ 2368 #ifdef INET6 2369 case O_UNREACH6: 2370 if (hlen > 0 && is_ipv6 && 2371 ((offset & IP6F_OFF_MASK) == 0) && 2372 (proto != IPPROTO_ICMPV6 || 2373 (is_icmp6_query(icmp6_type) == 1)) && 2374 !(m->m_flags & (M_BCAST|M_MCAST)) && 2375 !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { 2376 send_reject6( 2377 args, cmd->arg1, hlen, 2378 (struct ip6_hdr *)ip); 2379 m = args->m; 2380 } 2381 /* FALLTHROUGH */ 2382 #endif 2383 case O_DENY: 2384 retval = IP_FW_DENY; 2385 l = 0; /* exit inner loop */ 2386 done = 1; /* exit outer loop */ 2387 break; 2388 2389 case O_FORWARD_IP: 2390 if (args->eh) /* not valid on layer2 pkts */ 2391 break; 2392 if (q == NULL || q->rule != f || 2393 dyn_dir == MATCH_FORWARD) { 2394 struct sockaddr_in *sa; 2395 2396 sa = &(((ipfw_insn_sa *)cmd)->sa); 2397 if (sa->sin_addr.s_addr == INADDR_ANY) { 2398 #ifdef INET6 2399 /* 2400 * We use O_FORWARD_IP opcode for 2401 * fwd rule with tablearg, but tables 2402 * now support IPv6 addresses. And 2403 * when we are inspecting IPv6 packet, 2404 * we can use nh6 field from 2405 * table_value as next_hop6 address. 2406 */ 2407 if (is_ipv6) { 2408 struct sockaddr_in6 *sa6; 2409 2410 sa6 = args->next_hop6 = 2411 &args->hopstore6; 2412 sa6->sin6_family = AF_INET6; 2413 sa6->sin6_len = sizeof(*sa6); 2414 sa6->sin6_addr = TARG_VAL( 2415 chain, tablearg, nh6); 2416 /* 2417 * Set sin6_scope_id only for 2418 * link-local unicast addresses. 2419 */ 2420 if (IN6_IS_ADDR_LINKLOCAL( 2421 &sa6->sin6_addr)) 2422 sa6->sin6_scope_id = 2423 TARG_VAL(chain, 2424 tablearg, 2425 zoneid); 2426 } else 2427 #endif 2428 { 2429 sa = args->next_hop = 2430 &args->hopstore; 2431 sa->sin_family = AF_INET; 2432 sa->sin_len = sizeof(*sa); 2433 sa->sin_addr.s_addr = htonl( 2434 TARG_VAL(chain, tablearg, 2435 nh4)); 2436 } 2437 } else { 2438 args->next_hop = sa; 2439 } 2440 } 2441 retval = IP_FW_PASS; 2442 l = 0; /* exit inner loop */ 2443 done = 1; /* exit outer loop */ 2444 break; 2445 2446 #ifdef INET6 2447 case O_FORWARD_IP6: 2448 if (args->eh) /* not valid on layer2 pkts */ 2449 break; 2450 if (q == NULL || q->rule != f || 2451 dyn_dir == MATCH_FORWARD) { 2452 struct sockaddr_in6 *sin6; 2453 2454 sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); 2455 args->next_hop6 = sin6; 2456 } 2457 retval = IP_FW_PASS; 2458 l = 0; /* exit inner loop */ 2459 done = 1; /* exit outer loop */ 2460 break; 2461 #endif 2462 2463 case O_NETGRAPH: 2464 case O_NGTEE: 2465 set_match(args, f_pos, chain); 2466 args->rule.info = TARG(cmd->arg1, netgraph); 2467 if (V_fw_one_pass) 2468 args->rule.info |= IPFW_ONEPASS; 2469 retval = (cmd->opcode == O_NETGRAPH) ? 2470 IP_FW_NETGRAPH : IP_FW_NGTEE; 2471 l = 0; /* exit inner loop */ 2472 done = 1; /* exit outer loop */ 2473 break; 2474 2475 case O_SETFIB: { 2476 uint32_t fib; 2477 2478 IPFW_INC_RULE_COUNTER(f, pktlen); 2479 fib = TARG(cmd->arg1, fib) & 0x7FFFF; 2480 if (fib >= rt_numfibs) 2481 fib = 0; 2482 M_SETFIB(m, fib); 2483 args->f_id.fib = fib; 2484 l = 0; /* exit inner loop */ 2485 break; 2486 } 2487 2488 case O_SETDSCP: { 2489 uint16_t code; 2490 2491 code = TARG(cmd->arg1, dscp) & 0x3F; 2492 l = 0; /* exit inner loop */ 2493 if (is_ipv4) { 2494 uint16_t old; 2495 2496 old = *(uint16_t *)ip; 2497 ip->ip_tos = (code << 2) | 2498 (ip->ip_tos & 0x03); 2499 ip->ip_sum = cksum_adjust(ip->ip_sum, 2500 old, *(uint16_t *)ip); 2501 } else if (is_ipv6) { 2502 uint8_t *v; 2503 2504 v = &((struct ip6_hdr *)ip)->ip6_vfc; 2505 *v = (*v & 0xF0) | (code >> 2); 2506 v++; 2507 *v = (*v & 0x3F) | ((code & 0x03) << 6); 2508 } else 2509 break; 2510 2511 IPFW_INC_RULE_COUNTER(f, pktlen); 2512 break; 2513 } 2514 2515 case O_NAT: 2516 l = 0; /* exit inner loop */ 2517 done = 1; /* exit outer loop */ 2518 if (!IPFW_NAT_LOADED) { 2519 retval = IP_FW_DENY; 2520 break; 2521 } 2522 2523 struct cfg_nat *t; 2524 int nat_id; 2525 2526 set_match(args, f_pos, chain); 2527 /* Check if this is 'global' nat rule */ 2528 if (cmd->arg1 == 0) { 2529 retval = ipfw_nat_ptr(args, NULL, m); 2530 break; 2531 } 2532 t = ((ipfw_insn_nat *)cmd)->nat; 2533 if (t == NULL) { 2534 nat_id = TARG(cmd->arg1, nat); 2535 t = (*lookup_nat_ptr)(&chain->nat, nat_id); 2536 2537 if (t == NULL) { 2538 retval = IP_FW_DENY; 2539 break; 2540 } 2541 if (cmd->arg1 != IP_FW_TARG) 2542 ((ipfw_insn_nat *)cmd)->nat = t; 2543 } 2544 retval = ipfw_nat_ptr(args, t, m); 2545 break; 2546 2547 case O_REASS: { 2548 int ip_off; 2549 2550 IPFW_INC_RULE_COUNTER(f, pktlen); 2551 l = 0; /* in any case exit inner loop */ 2552 ip_off = ntohs(ip->ip_off); 2553 2554 /* if not fragmented, go to next rule */ 2555 if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) 2556 break; 2557 2558 args->m = m = ip_reass(m); 2559 2560 /* 2561 * do IP header checksum fixup. 2562 */ 2563 if (m == NULL) { /* fragment got swallowed */ 2564 retval = IP_FW_DENY; 2565 } else { /* good, packet complete */ 2566 int hlen; 2567 2568 ip = mtod(m, struct ip *); 2569 hlen = ip->ip_hl << 2; 2570 ip->ip_sum = 0; 2571 if (hlen == sizeof(struct ip)) 2572 ip->ip_sum = in_cksum_hdr(ip); 2573 else 2574 ip->ip_sum = in_cksum(m, hlen); 2575 retval = IP_FW_REASS; 2576 set_match(args, f_pos, chain); 2577 } 2578 done = 1; /* exit outer loop */ 2579 break; 2580 } 2581 2582 default: 2583 panic("-- unknown opcode %d\n", cmd->opcode); 2584 } /* end of switch() on opcodes */ 2585 /* 2586 * if we get here with l=0, then match is irrelevant. 2587 */ 2588 2589 if (cmd->len & F_NOT) 2590 match = !match; 2591 2592 if (match) { 2593 if (cmd->len & F_OR) 2594 skip_or = 1; 2595 } else { 2596 if (!(cmd->len & F_OR)) /* not an OR block, */ 2597 break; /* try next rule */ 2598 } 2599 2600 } /* end of inner loop, scan opcodes */ 2601 #undef PULLUP_LEN 2602 2603 if (done) 2604 break; 2605 2606 /* next_rule:; */ /* try next rule */ 2607 2608 } /* end of outer for, scan rules */ 2609 2610 if (done) { 2611 struct ip_fw *rule = chain->map[f_pos]; 2612 /* Update statistics */ 2613 IPFW_INC_RULE_COUNTER(rule, pktlen); 2614 } else { 2615 retval = IP_FW_DENY; 2616 printf("ipfw: ouch!, skip past end of rules, denying packet\n"); 2617 } 2618 IPFW_PF_RUNLOCK(chain); 2619 #ifdef __FreeBSD__ 2620 if (ucred_cache != NULL) 2621 crfree(ucred_cache); 2622 #endif 2623 return (retval); 2624 2625 pullup_failed: 2626 if (V_fw_verbose) 2627 printf("ipfw: pullup failed\n"); 2628 return (IP_FW_DENY); 2629 } 2630 2631 /* 2632 * Set maximum number of tables that can be used in given VNET ipfw instance. 2633 */ 2634 #ifdef SYSCTL_NODE 2635 static int 2636 sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) 2637 { 2638 int error; 2639 unsigned int ntables; 2640 2641 ntables = V_fw_tables_max; 2642 2643 error = sysctl_handle_int(oidp, &ntables, 0, req); 2644 /* Read operation or some error */ 2645 if ((error != 0) || (req->newptr == NULL)) 2646 return (error); 2647 2648 return (ipfw_resize_tables(&V_layer3_chain, ntables)); 2649 } 2650 2651 /* 2652 * Switches table namespace between global and per-set. 2653 */ 2654 static int 2655 sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS) 2656 { 2657 int error; 2658 unsigned int sets; 2659 2660 sets = V_fw_tables_sets; 2661 2662 error = sysctl_handle_int(oidp, &sets, 0, req); 2663 /* Read operation or some error */ 2664 if ((error != 0) || (req->newptr == NULL)) 2665 return (error); 2666 2667 return (ipfw_switch_tables_namespace(&V_layer3_chain, sets)); 2668 } 2669 #endif 2670 2671 /* 2672 * Module and VNET glue 2673 */ 2674 2675 /* 2676 * Stuff that must be initialised only on boot or module load 2677 */ 2678 static int 2679 ipfw_init(void) 2680 { 2681 int error = 0; 2682 2683 /* 2684 * Only print out this stuff the first time around, 2685 * when called from the sysinit code. 2686 */ 2687 printf("ipfw2 " 2688 #ifdef INET6 2689 "(+ipv6) " 2690 #endif 2691 "initialized, divert %s, nat %s, " 2692 "default to %s, logging ", 2693 #ifdef IPDIVERT 2694 "enabled", 2695 #else 2696 "loadable", 2697 #endif 2698 #ifdef IPFIREWALL_NAT 2699 "enabled", 2700 #else 2701 "loadable", 2702 #endif 2703 default_to_accept ? "accept" : "deny"); 2704 2705 /* 2706 * Note: V_xxx variables can be accessed here but the vnet specific 2707 * initializer may not have been called yet for the VIMAGE case. 2708 * Tuneables will have been processed. We will print out values for 2709 * the default vnet. 2710 * XXX This should all be rationalized AFTER 8.0 2711 */ 2712 if (V_fw_verbose == 0) 2713 printf("disabled\n"); 2714 else if (V_verbose_limit == 0) 2715 printf("unlimited\n"); 2716 else 2717 printf("limited to %d packets/entry by default\n", 2718 V_verbose_limit); 2719 2720 /* Check user-supplied table count for validness */ 2721 if (default_fw_tables > IPFW_TABLES_MAX) 2722 default_fw_tables = IPFW_TABLES_MAX; 2723 2724 ipfw_init_sopt_handler(); 2725 ipfw_log_bpf(1); /* init */ 2726 ipfw_iface_init(); 2727 return (error); 2728 } 2729 2730 /* 2731 * Called for the removal of the last instance only on module unload. 2732 */ 2733 static void 2734 ipfw_destroy(void) 2735 { 2736 2737 ipfw_iface_destroy(); 2738 ipfw_log_bpf(0); /* uninit */ 2739 ipfw_destroy_sopt_handler(); 2740 printf("IP firewall unloaded\n"); 2741 } 2742 2743 /* 2744 * Stuff that must be initialized for every instance 2745 * (including the first of course). 2746 */ 2747 static int 2748 vnet_ipfw_init(const void *unused) 2749 { 2750 int error, first; 2751 struct ip_fw *rule = NULL; 2752 struct ip_fw_chain *chain; 2753 2754 chain = &V_layer3_chain; 2755 2756 first = IS_DEFAULT_VNET(curvnet) ? 1 : 0; 2757 2758 /* First set up some values that are compile time options */ 2759 V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ 2760 V_fw_deny_unknown_exthdrs = 1; 2761 #ifdef IPFIREWALL_VERBOSE 2762 V_fw_verbose = 1; 2763 #endif 2764 #ifdef IPFIREWALL_VERBOSE_LIMIT 2765 V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 2766 #endif 2767 #ifdef IPFIREWALL_NAT 2768 LIST_INIT(&chain->nat); 2769 #endif 2770 2771 /* Init shared services hash table */ 2772 ipfw_init_srv(chain); 2773 2774 ipfw_init_obj_rewriter(); 2775 ipfw_init_counters(); 2776 /* insert the default rule and create the initial map */ 2777 chain->n_rules = 1; 2778 chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO); 2779 rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw)); 2780 2781 /* Set initial number of tables */ 2782 V_fw_tables_max = default_fw_tables; 2783 error = ipfw_init_tables(chain, first); 2784 if (error) { 2785 printf("ipfw2: setting up tables failed\n"); 2786 free(chain->map, M_IPFW); 2787 free(rule, M_IPFW); 2788 return (ENOSPC); 2789 } 2790 2791 /* fill and insert the default rule */ 2792 rule->act_ofs = 0; 2793 rule->rulenum = IPFW_DEFAULT_RULE; 2794 rule->cmd_len = 1; 2795 rule->set = RESVD_SET; 2796 rule->cmd[0].len = 1; 2797 rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; 2798 chain->default_rule = chain->map[0] = rule; 2799 chain->id = rule->id = 1; 2800 /* Pre-calculate rules length for legacy dump format */ 2801 chain->static_len = sizeof(struct ip_fw_rule0); 2802 2803 IPFW_LOCK_INIT(chain); 2804 ipfw_dyn_init(chain); 2805 #ifdef LINEAR_SKIPTO 2806 ipfw_init_skipto_cache(chain); 2807 #endif 2808 2809 /* First set up some values that are compile time options */ 2810 V_ipfw_vnet_ready = 1; /* Open for business */ 2811 2812 /* 2813 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6. 2814 * Even if the latter two fail we still keep the module alive 2815 * because the sockopt and layer2 paths are still useful. 2816 * ipfw[6]_hook return 0 on success, ENOENT on failure, 2817 * so we can ignore the exact return value and just set a flag. 2818 * 2819 * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so 2820 * changes in the underlying (per-vnet) variables trigger 2821 * immediate hook()/unhook() calls. 2822 * In layer2 we have the same behaviour, except that V_ether_ipfw 2823 * is checked on each packet because there are no pfil hooks. 2824 */ 2825 V_ip_fw_ctl_ptr = ipfw_ctl3; 2826 error = ipfw_attach_hooks(1); 2827 return (error); 2828 } 2829 2830 /* 2831 * Called for the removal of each instance. 2832 */ 2833 static int 2834 vnet_ipfw_uninit(const void *unused) 2835 { 2836 struct ip_fw *reap; 2837 struct ip_fw_chain *chain = &V_layer3_chain; 2838 int i, last; 2839 2840 V_ipfw_vnet_ready = 0; /* tell new callers to go away */ 2841 /* 2842 * disconnect from ipv4, ipv6, layer2 and sockopt. 2843 * Then grab, release and grab again the WLOCK so we make 2844 * sure the update is propagated and nobody will be in. 2845 */ 2846 (void)ipfw_attach_hooks(0 /* detach */); 2847 V_ip_fw_ctl_ptr = NULL; 2848 2849 last = IS_DEFAULT_VNET(curvnet) ? 1 : 0; 2850 2851 IPFW_UH_WLOCK(chain); 2852 IPFW_UH_WUNLOCK(chain); 2853 IPFW_UH_WLOCK(chain); 2854 2855 IPFW_WLOCK(chain); 2856 ipfw_dyn_uninit(0); /* run the callout_drain */ 2857 IPFW_WUNLOCK(chain); 2858 2859 reap = NULL; 2860 IPFW_WLOCK(chain); 2861 for (i = 0; i < chain->n_rules; i++) 2862 ipfw_reap_add(chain, &reap, chain->map[i]); 2863 free(chain->map, M_IPFW); 2864 #ifdef LINEAR_SKIPTO 2865 ipfw_destroy_skipto_cache(chain); 2866 #endif 2867 IPFW_WUNLOCK(chain); 2868 IPFW_UH_WUNLOCK(chain); 2869 ipfw_destroy_tables(chain, last); 2870 if (reap != NULL) 2871 ipfw_reap_rules(reap); 2872 vnet_ipfw_iface_destroy(chain); 2873 ipfw_destroy_srv(chain); 2874 IPFW_LOCK_DESTROY(chain); 2875 ipfw_dyn_uninit(1); /* free the remaining parts */ 2876 ipfw_destroy_counters(); 2877 ipfw_destroy_obj_rewriter(); 2878 return (0); 2879 } 2880 2881 /* 2882 * Module event handler. 2883 * In general we have the choice of handling most of these events by the 2884 * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to 2885 * use the SYSINIT handlers as they are more capable of expressing the 2886 * flow of control during module and vnet operations, so this is just 2887 * a skeleton. Note there is no SYSINIT equivalent of the module 2888 * SHUTDOWN handler, but we don't have anything to do in that case anyhow. 2889 */ 2890 static int 2891 ipfw_modevent(module_t mod, int type, void *unused) 2892 { 2893 int err = 0; 2894 2895 switch (type) { 2896 case MOD_LOAD: 2897 /* Called once at module load or 2898 * system boot if compiled in. */ 2899 break; 2900 case MOD_QUIESCE: 2901 /* Called before unload. May veto unloading. */ 2902 break; 2903 case MOD_UNLOAD: 2904 /* Called during unload. */ 2905 break; 2906 case MOD_SHUTDOWN: 2907 /* Called during system shutdown. */ 2908 break; 2909 default: 2910 err = EOPNOTSUPP; 2911 break; 2912 } 2913 return err; 2914 } 2915 2916 static moduledata_t ipfwmod = { 2917 "ipfw", 2918 ipfw_modevent, 2919 0 2920 }; 2921 2922 /* Define startup order. */ 2923 #define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN 2924 #define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */ 2925 #define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */ 2926 #define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */ 2927 2928 DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER); 2929 FEATURE(ipfw_ctl3, "ipfw new sockopt calls"); 2930 MODULE_VERSION(ipfw, 3); 2931 /* should declare some dependencies here */ 2932 2933 /* 2934 * Starting up. Done in order after ipfwmod() has been called. 2935 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2936 */ 2937 SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, 2938 ipfw_init, NULL); 2939 VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, 2940 vnet_ipfw_init, NULL); 2941 2942 /* 2943 * Closing up shop. These are done in REVERSE ORDER, but still 2944 * after ipfwmod() has been called. Not called on reboot. 2945 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2946 * or when the module is unloaded. 2947 */ 2948 SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, 2949 ipfw_destroy, NULL); 2950 VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, 2951 vnet_ipfw_uninit, NULL); 2952 /* end of file */ 2953