1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Handle firewalling
4 * Linux ethernet bridge
5 *
6 * Authors:
7 * Lennert Buytenhek <buytenh@gnu.org>
8 * Bart De Schuymer <bdschuym@pandora.be>
9 *
10 * Lennert dedicates this file to Kerstin Wurdinger.
11 */
12
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/slab.h>
16 #include <linux/ip.h>
17 #include <linux/netdevice.h>
18 #include <linux/skbuff.h>
19 #include <linux/if_arp.h>
20 #include <linux/if_ether.h>
21 #include <linux/if_vlan.h>
22 #include <linux/if_pppox.h>
23 #include <linux/ppp_defs.h>
24 #include <linux/netfilter_bridge.h>
25 #include <uapi/linux/netfilter_bridge.h>
26 #include <linux/netfilter_ipv4.h>
27 #include <linux/netfilter_ipv6.h>
28 #include <linux/netfilter_arp.h>
29 #include <linux/in_route.h>
30 #include <linux/rculist.h>
31 #include <linux/inetdevice.h>
32
33 #include <net/ip.h>
34 #include <net/ipv6.h>
35 #include <net/ip6_route.h>
36 #include <net/addrconf.h>
37 #include <net/dst_metadata.h>
38 #include <net/route.h>
39 #include <net/netfilter/br_netfilter.h>
40 #include <net/netns/generic.h>
41 #include <net/inet_dscp.h>
42
43 #include <linux/uaccess.h>
44 #include "br_private.h"
45 #ifdef CONFIG_SYSCTL
46 #include <linux/sysctl.h>
47 #endif
48
49 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
50 #include <net/netfilter/nf_conntrack_core.h>
51 #endif
52
53 static unsigned int brnf_net_id __read_mostly;
54
55 struct brnf_net {
56 bool enabled;
57
58 #ifdef CONFIG_SYSCTL
59 struct ctl_table_header *ctl_hdr;
60 #endif
61
62 /* default value is 1 */
63 int call_iptables;
64 int call_ip6tables;
65 int call_arptables;
66
67 /* default value is 0 */
68 int filter_vlan_tagged;
69 int filter_pppoe_tagged;
70 int pass_vlan_indev;
71 };
72
73 #define IS_IP(skb) \
74 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
75
76 #define IS_IPV6(skb) \
77 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
78
79 #define IS_ARP(skb) \
80 (!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
81
vlan_proto(const struct sk_buff * skb)82 static inline __be16 vlan_proto(const struct sk_buff *skb)
83 {
84 if (skb_vlan_tag_present(skb))
85 return skb->protocol;
86 else if (skb->protocol == htons(ETH_P_8021Q))
87 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
88 else
89 return 0;
90 }
91
is_vlan_ip(const struct sk_buff * skb,const struct net * net)92 static inline bool is_vlan_ip(const struct sk_buff *skb, const struct net *net)
93 {
94 struct brnf_net *brnet = net_generic(net, brnf_net_id);
95
96 return vlan_proto(skb) == htons(ETH_P_IP) && brnet->filter_vlan_tagged;
97 }
98
is_vlan_ipv6(const struct sk_buff * skb,const struct net * net)99 static inline bool is_vlan_ipv6(const struct sk_buff *skb,
100 const struct net *net)
101 {
102 struct brnf_net *brnet = net_generic(net, brnf_net_id);
103
104 return vlan_proto(skb) == htons(ETH_P_IPV6) &&
105 brnet->filter_vlan_tagged;
106 }
107
is_vlan_arp(const struct sk_buff * skb,const struct net * net)108 static inline bool is_vlan_arp(const struct sk_buff *skb, const struct net *net)
109 {
110 struct brnf_net *brnet = net_generic(net, brnf_net_id);
111
112 return vlan_proto(skb) == htons(ETH_P_ARP) && brnet->filter_vlan_tagged;
113 }
114
pppoe_proto(const struct sk_buff * skb)115 static inline __be16 pppoe_proto(const struct sk_buff *skb)
116 {
117 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
118 sizeof(struct pppoe_hdr)));
119 }
120
is_pppoe_ip(const struct sk_buff * skb,const struct net * net)121 static inline bool is_pppoe_ip(const struct sk_buff *skb, const struct net *net)
122 {
123 struct brnf_net *brnet = net_generic(net, brnf_net_id);
124
125 return skb->protocol == htons(ETH_P_PPP_SES) &&
126 pppoe_proto(skb) == htons(PPP_IP) && brnet->filter_pppoe_tagged;
127 }
128
is_pppoe_ipv6(const struct sk_buff * skb,const struct net * net)129 static inline bool is_pppoe_ipv6(const struct sk_buff *skb,
130 const struct net *net)
131 {
132 struct brnf_net *brnet = net_generic(net, brnf_net_id);
133
134 return skb->protocol == htons(ETH_P_PPP_SES) &&
135 pppoe_proto(skb) == htons(PPP_IPV6) &&
136 brnet->filter_pppoe_tagged;
137 }
138
139 /* largest possible L2 header, see br_nf_dev_queue_xmit() */
140 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
141
142 struct brnf_frag_data {
143 local_lock_t bh_lock;
144 char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
145 u8 encap_size;
146 u8 size;
147 u16 vlan_tci;
148 __be16 vlan_proto;
149 };
150
151 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage) = {
152 .bh_lock = INIT_LOCAL_LOCK(bh_lock),
153 };
154
nf_bridge_info_free(struct sk_buff * skb)155 static void nf_bridge_info_free(struct sk_buff *skb)
156 {
157 skb_ext_del(skb, SKB_EXT_BRIDGE_NF);
158 }
159
bridge_parent(const struct net_device * dev)160 static inline struct net_device *bridge_parent(const struct net_device *dev)
161 {
162 struct net_bridge_port *port;
163
164 port = br_port_get_rcu(dev);
165 return port ? port->br->dev : NULL;
166 }
167
nf_bridge_unshare(struct sk_buff * skb)168 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
169 {
170 return skb_ext_add(skb, SKB_EXT_BRIDGE_NF);
171 }
172
nf_bridge_encap_header_len(const struct sk_buff * skb)173 unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
174 {
175 switch (skb->protocol) {
176 case __cpu_to_be16(ETH_P_8021Q):
177 return VLAN_HLEN;
178 case __cpu_to_be16(ETH_P_PPP_SES):
179 return PPPOE_SES_HLEN;
180 default:
181 return 0;
182 }
183 }
184
nf_bridge_pull_encap_header(struct sk_buff * skb)185 static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
186 {
187 unsigned int len = nf_bridge_encap_header_len(skb);
188
189 skb_pull(skb, len);
190 skb->network_header += len;
191 }
192
nf_bridge_pull_encap_header_rcsum(struct sk_buff * skb)193 static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
194 {
195 unsigned int len = nf_bridge_encap_header_len(skb);
196
197 skb_pull_rcsum(skb, len);
198 skb->network_header += len;
199 }
200
201 /* When handing a packet over to the IP layer
202 * check whether we have a skb that is in the
203 * expected format
204 */
205
br_validate_ipv4(struct net * net,struct sk_buff * skb)206 static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
207 {
208 const struct iphdr *iph;
209 u32 len;
210
211 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
212 goto inhdr_error;
213
214 iph = ip_hdr(skb);
215
216 /* Basic sanity checks */
217 if (iph->ihl < 5 || iph->version != 4)
218 goto inhdr_error;
219
220 if (!pskb_may_pull(skb, iph->ihl*4))
221 goto inhdr_error;
222
223 iph = ip_hdr(skb);
224 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
225 goto csum_error;
226
227 len = skb_ip_totlen(skb);
228 if (skb->len < len) {
229 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
230 goto drop;
231 } else if (len < (iph->ihl*4))
232 goto inhdr_error;
233
234 if (pskb_trim_rcsum(skb, len)) {
235 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
236 goto drop;
237 }
238
239 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
240 /* We should really parse IP options here but until
241 * somebody who actually uses IP options complains to
242 * us we'll just silently ignore the options because
243 * we're lazy!
244 */
245 return 0;
246
247 csum_error:
248 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
249 inhdr_error:
250 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
251 drop:
252 return -1;
253 }
254
nf_bridge_update_protocol(struct sk_buff * skb)255 void nf_bridge_update_protocol(struct sk_buff *skb)
256 {
257 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
258
259 switch (nf_bridge->orig_proto) {
260 case BRNF_PROTO_8021Q:
261 skb->protocol = htons(ETH_P_8021Q);
262 break;
263 case BRNF_PROTO_PPPOE:
264 skb->protocol = htons(ETH_P_PPP_SES);
265 break;
266 case BRNF_PROTO_UNCHANGED:
267 break;
268 }
269 }
270
271 /* Obtain the correct destination MAC address, while preserving the original
272 * source MAC address. If we already know this address, we just copy it. If we
273 * don't, we use the neighbour framework to find out. In both cases, we make
274 * sure that br_handle_frame_finish() is called afterwards.
275 */
br_nf_pre_routing_finish_bridge(struct net * net,struct sock * sk,struct sk_buff * skb)276 int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb)
277 {
278 struct neighbour *neigh;
279 struct dst_entry *dst;
280
281 skb->dev = bridge_parent(skb->dev);
282 if (!skb->dev)
283 goto free_skb;
284 dst = skb_dst(skb);
285 neigh = dst_neigh_lookup_skb(dst, skb);
286 if (neigh) {
287 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
288 int ret;
289
290 if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) &&
291 READ_ONCE(neigh->hh.hh_len)) {
292 struct net_device *br_indev;
293
294 br_indev = nf_bridge_get_physindev(skb, net);
295 if (!br_indev) {
296 neigh_release(neigh);
297 goto free_skb;
298 }
299
300 if (neigh_hh_bridge(&neigh->hh, skb)) {
301 neigh_release(neigh);
302 goto free_skb;
303 }
304
305 skb->dev = br_indev;
306
307 ret = br_handle_frame_finish(net, sk, skb);
308 } else {
309 /* the neighbour function below overwrites the complete
310 * MAC header, so we save the Ethernet source address and
311 * protocol number.
312 */
313 skb_copy_from_linear_data_offset(skb,
314 -(ETH_HLEN-ETH_ALEN),
315 nf_bridge->neigh_header,
316 ETH_HLEN-ETH_ALEN);
317 /* tell br_dev_xmit to continue with forwarding */
318 nf_bridge->bridged_dnat = 1;
319 /* FIXME Need to refragment */
320 ret = READ_ONCE(neigh->output)(neigh, skb);
321 }
322 neigh_release(neigh);
323 return ret;
324 }
325 free_skb:
326 kfree_skb(skb);
327 return 0;
328 }
329
330 static inline bool
br_nf_ipv4_daddr_was_changed(const struct sk_buff * skb,const struct nf_bridge_info * nf_bridge)331 br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
332 const struct nf_bridge_info *nf_bridge)
333 {
334 return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
335 }
336
337 /* This requires some explaining. If DNAT has taken place,
338 * we will need to fix up the destination Ethernet address.
339 * This is also true when SNAT takes place (for the reply direction).
340 *
341 * There are two cases to consider:
342 * 1. The packet was DNAT'ed to a device in the same bridge
343 * port group as it was received on. We can still bridge
344 * the packet.
345 * 2. The packet was DNAT'ed to a different device, either
346 * a non-bridged device or another bridge port group.
347 * The packet will need to be routed.
348 *
349 * The correct way of distinguishing between these two cases is to
350 * call ip_route_input() and to look at skb->dst->dev, which is
351 * changed to the destination device if ip_route_input() succeeds.
352 *
353 * Let's first consider the case that ip_route_input() succeeds:
354 *
355 * If the output device equals the logical bridge device the packet
356 * came in on, we can consider this bridging. The corresponding MAC
357 * address will be obtained in br_nf_pre_routing_finish_bridge.
358 * Otherwise, the packet is considered to be routed and we just
359 * change the destination MAC address so that the packet will
360 * later be passed up to the IP stack to be routed. For a redirected
361 * packet, ip_route_input() will give back the localhost as output device,
362 * which differs from the bridge device.
363 *
364 * Let's now consider the case that ip_route_input() fails:
365 *
366 * This can be because the destination address is martian, in which case
367 * the packet will be dropped.
368 * If IP forwarding is disabled, ip_route_input() will fail, while
369 * ip_route_output_key() can return success. The source
370 * address for ip_route_output_key() is set to zero, so ip_route_output_key()
371 * thinks we're handling a locally generated packet and won't care
372 * if IP forwarding is enabled. If the output device equals the logical bridge
373 * device, we proceed as if ip_route_input() succeeded. If it differs from the
374 * logical bridge port or if ip_route_output_key() fails we drop the packet.
375 */
br_nf_pre_routing_finish(struct net * net,struct sock * sk,struct sk_buff * skb)376 static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
377 {
378 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
379 struct net_device *dev = skb->dev, *br_indev;
380 const struct iphdr *iph = ip_hdr(skb);
381 enum skb_drop_reason reason;
382 struct rtable *rt;
383
384 br_indev = nf_bridge_get_physindev(skb, net);
385 if (!br_indev) {
386 kfree_skb(skb);
387 return 0;
388 }
389
390 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
391
392 if (nf_bridge->pkt_otherhost) {
393 skb->pkt_type = PACKET_OTHERHOST;
394 nf_bridge->pkt_otherhost = false;
395 }
396 nf_bridge->in_prerouting = 0;
397 if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
398 reason = ip_route_input(skb, iph->daddr, iph->saddr,
399 ip4h_dscp(iph), dev);
400 if (reason) {
401 kfree_skb_reason(skb, reason);
402 return 0;
403 } else {
404 if (skb_dst(skb)->dev == dev) {
405 skb->dev = br_indev;
406 nf_bridge_update_protocol(skb);
407 nf_bridge_push_encap_header(skb);
408 br_nf_hook_thresh(NF_BR_PRE_ROUTING,
409 net, sk, skb, skb->dev,
410 NULL,
411 br_nf_pre_routing_finish_bridge);
412 return 0;
413 }
414 ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
415 skb->pkt_type = PACKET_HOST;
416 }
417 } else {
418 rt = bridge_parent_rtable(br_indev);
419 if (!rt) {
420 kfree_skb(skb);
421 return 0;
422 }
423 skb_dst_drop(skb);
424 skb_dst_set_noref(skb, &rt->dst);
425 }
426
427 skb->dev = br_indev;
428 nf_bridge_update_protocol(skb);
429 nf_bridge_push_encap_header(skb);
430 br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
431 br_handle_frame_finish);
432 return 0;
433 }
434
brnf_get_logical_dev(struct sk_buff * skb,const struct net_device * dev,const struct net * net)435 static struct net_device *brnf_get_logical_dev(struct sk_buff *skb,
436 const struct net_device *dev,
437 const struct net *net)
438 {
439 struct net_device *vlan, *br;
440 struct brnf_net *brnet = net_generic(net, brnf_net_id);
441
442 br = bridge_parent(dev);
443
444 if (brnet->pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
445 return br;
446
447 vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
448 skb_vlan_tag_get(skb) & VLAN_VID_MASK);
449
450 return vlan ? vlan : br;
451 }
452
453 /* Some common code for IPv4/IPv6 */
setup_pre_routing(struct sk_buff * skb,const struct net * net)454 struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net)
455 {
456 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
457
458 if (skb->pkt_type == PACKET_OTHERHOST) {
459 skb->pkt_type = PACKET_HOST;
460 nf_bridge->pkt_otherhost = true;
461 }
462
463 nf_bridge->in_prerouting = 1;
464 nf_bridge->physinif = skb->dev->ifindex;
465 skb->dev = brnf_get_logical_dev(skb, skb->dev, net);
466
467 if (skb->protocol == htons(ETH_P_8021Q))
468 nf_bridge->orig_proto = BRNF_PROTO_8021Q;
469 else if (skb->protocol == htons(ETH_P_PPP_SES))
470 nf_bridge->orig_proto = BRNF_PROTO_PPPOE;
471
472 /* Must drop socket now because of tproxy. */
473 skb_orphan(skb);
474 return skb->dev;
475 }
476
477 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
478 * Replicate the checks that IPv4 does on packet reception.
479 * Set skb->dev to the bridge device (i.e. parent of the
480 * receiving device) to make netfilter happy, the REDIRECT
481 * target in particular. Save the original destination IP
482 * address to be able to detect DNAT afterwards. */
br_nf_pre_routing(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)483 static unsigned int br_nf_pre_routing(void *priv,
484 struct sk_buff *skb,
485 const struct nf_hook_state *state)
486 {
487 struct nf_bridge_info *nf_bridge;
488 struct net_bridge_port *p;
489 struct net_bridge *br;
490 __u32 len = nf_bridge_encap_header_len(skb);
491 struct brnf_net *brnet;
492
493 if (unlikely(!pskb_may_pull(skb, len)))
494 return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
495
496 p = br_port_get_rcu(state->in);
497 if (p == NULL)
498 return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
499 br = p->br;
500
501 brnet = net_generic(state->net, brnf_net_id);
502 if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
503 is_pppoe_ipv6(skb, state->net)) {
504 if (!brnet->call_ip6tables &&
505 !br_opt_get(br, BROPT_NF_CALL_IP6TABLES))
506 return NF_ACCEPT;
507 if (!ipv6_mod_enabled()) {
508 pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported.");
509 return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0);
510 }
511
512 nf_bridge_pull_encap_header_rcsum(skb);
513 return br_nf_pre_routing_ipv6(priv, skb, state);
514 }
515
516 if (!brnet->call_iptables && !br_opt_get(br, BROPT_NF_CALL_IPTABLES))
517 return NF_ACCEPT;
518
519 if (!IS_IP(skb) && !is_vlan_ip(skb, state->net) &&
520 !is_pppoe_ip(skb, state->net))
521 return NF_ACCEPT;
522
523 nf_bridge_pull_encap_header_rcsum(skb);
524
525 if (br_validate_ipv4(state->net, skb))
526 return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
527
528 if (!nf_bridge_alloc(skb))
529 return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
530 if (!setup_pre_routing(skb, state->net))
531 return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
532
533 nf_bridge = nf_bridge_info_get(skb);
534 nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
535
536 skb->protocol = htons(ETH_P_IP);
537 skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4;
538
539 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
540 skb->dev, NULL,
541 br_nf_pre_routing_finish);
542
543 return NF_STOLEN;
544 }
545
546 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
547 /* conntracks' nf_confirm logic cannot handle cloned skbs referencing
548 * the same nf_conn entry, which will happen for multicast (broadcast)
549 * Frames on bridges.
550 *
551 * Example:
552 * macvlan0
553 * br0
554 * ethX ethY
555 *
556 * ethX (or Y) receives multicast or broadcast packet containing
557 * an IP packet, not yet in conntrack table.
558 *
559 * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
560 * -> skb->_nfct now references a unconfirmed entry
561 * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
562 * interface.
563 * 3. skb gets passed up the stack.
564 * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
565 * and schedules a work queue to send them out on the lower devices.
566 *
567 * The clone skb->_nfct is not a copy, it is the same entry as the
568 * original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
569 * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
570 *
571 * The Macvlan broadcast worker and normal confirm path will race.
572 *
573 * This race will not happen if step 2 already confirmed a clone. In that
574 * case later steps perform skb_clone() with skb->_nfct already confirmed (in
575 * hash table). This works fine.
576 *
577 * But such confirmation won't happen when eb/ip/nftables rules dropped the
578 * packets before they reached the nf_confirm step in postrouting.
579 *
580 * Work around this problem by explicit confirmation of the entry at
581 * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
582 * entry.
583 *
584 */
br_nf_local_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)585 static unsigned int br_nf_local_in(void *priv,
586 struct sk_buff *skb,
587 const struct nf_hook_state *state)
588 {
589 bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
590 struct nf_conntrack *nfct = skb_nfct(skb);
591 const struct nf_ct_hook *ct_hook;
592 struct nf_conn *ct;
593 int ret;
594
595 if (promisc) {
596 nf_reset_ct(skb);
597 return NF_ACCEPT;
598 }
599
600 if (!nfct || skb->pkt_type == PACKET_HOST)
601 return NF_ACCEPT;
602
603 ct = container_of(nfct, struct nf_conn, ct_general);
604 if (likely(nf_ct_is_confirmed(ct)))
605 return NF_ACCEPT;
606
607 if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) {
608 nf_reset_ct(skb);
609 return NF_ACCEPT;
610 }
611
612 WARN_ON_ONCE(skb_shared(skb));
613
614 /* We can't call nf_confirm here, it would create a dependency
615 * on nf_conntrack module.
616 */
617 ct_hook = rcu_dereference(nf_ct_hook);
618 if (!ct_hook) {
619 skb->_nfct = 0ul;
620 nf_conntrack_put(nfct);
621 return NF_ACCEPT;
622 }
623
624 nf_bridge_pull_encap_header(skb);
625 ret = ct_hook->confirm(skb);
626 switch (ret & NF_VERDICT_MASK) {
627 case NF_STOLEN:
628 return NF_STOLEN;
629 default:
630 nf_bridge_push_encap_header(skb);
631 break;
632 }
633
634 return ret;
635 }
636 #endif
637
638 /* PF_BRIDGE/FORWARD *************************************************/
br_nf_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)639 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
640 {
641 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
642 struct net_device *in;
643
644 if (!IS_ARP(skb) && !is_vlan_arp(skb, net)) {
645
646 if (skb->protocol == htons(ETH_P_IP))
647 nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
648
649 if (skb->protocol == htons(ETH_P_IPV6))
650 nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
651
652 in = nf_bridge_get_physindev(skb, net);
653 if (!in) {
654 kfree_skb(skb);
655 return 0;
656 }
657 if (nf_bridge->pkt_otherhost) {
658 skb->pkt_type = PACKET_OTHERHOST;
659 nf_bridge->pkt_otherhost = false;
660 }
661 nf_bridge_update_protocol(skb);
662 } else {
663 in = *((struct net_device **)(skb->cb));
664 }
665 nf_bridge_push_encap_header(skb);
666
667 br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev,
668 br_forward_finish);
669 return 0;
670 }
671
672
br_nf_forward_ip(struct sk_buff * skb,const struct nf_hook_state * state,u8 pf)673 static unsigned int br_nf_forward_ip(struct sk_buff *skb,
674 const struct nf_hook_state *state,
675 u8 pf)
676 {
677 struct nf_bridge_info *nf_bridge;
678 struct net_device *parent;
679
680 nf_bridge = nf_bridge_info_get(skb);
681 if (!nf_bridge)
682 return NF_ACCEPT;
683
684 /* Need exclusive nf_bridge_info since we might have multiple
685 * different physoutdevs. */
686 if (!nf_bridge_unshare(skb))
687 return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
688
689 nf_bridge = nf_bridge_info_get(skb);
690 if (!nf_bridge)
691 return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0);
692
693 parent = bridge_parent(state->out);
694 if (!parent)
695 return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
696
697 nf_bridge_pull_encap_header(skb);
698
699 if (skb->pkt_type == PACKET_OTHERHOST) {
700 skb->pkt_type = PACKET_HOST;
701 nf_bridge->pkt_otherhost = true;
702 }
703
704 if (pf == NFPROTO_IPV4) {
705 if (br_validate_ipv4(state->net, skb))
706 return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
707 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
708 skb->protocol = htons(ETH_P_IP);
709 } else if (pf == NFPROTO_IPV6) {
710 if (br_validate_ipv6(state->net, skb))
711 return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0);
712 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
713 skb->protocol = htons(ETH_P_IPV6);
714 } else {
715 WARN_ON_ONCE(1);
716 return NF_DROP;
717 }
718
719 nf_bridge->physoutdev = skb->dev;
720
721 NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
722 brnf_get_logical_dev(skb, state->in, state->net),
723 parent, br_nf_forward_finish);
724
725 return NF_STOLEN;
726 }
727
br_nf_forward_arp(struct sk_buff * skb,const struct nf_hook_state * state)728 static unsigned int br_nf_forward_arp(struct sk_buff *skb,
729 const struct nf_hook_state *state)
730 {
731 struct net_bridge_port *p;
732 struct net_bridge *br;
733 struct net_device **d = (struct net_device **)(skb->cb);
734 struct brnf_net *brnet;
735
736 p = br_port_get_rcu(state->out);
737 if (p == NULL)
738 return NF_ACCEPT;
739 br = p->br;
740
741 brnet = net_generic(state->net, brnf_net_id);
742 if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES))
743 return NF_ACCEPT;
744
745 if (is_vlan_arp(skb, state->net))
746 nf_bridge_pull_encap_header(skb);
747
748 if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
749 return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0);
750
751 if (arp_hdr(skb)->ar_pln != 4) {
752 if (is_vlan_arp(skb, state->net))
753 nf_bridge_push_encap_header(skb);
754 return NF_ACCEPT;
755 }
756 *d = state->in;
757 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb,
758 state->in, state->out, br_nf_forward_finish);
759
760 return NF_STOLEN;
761 }
762
763 /* This is the 'purely bridged' case. For IP, we pass the packet to
764 * netfilter with indev and outdev set to the bridge device,
765 * but we are still able to filter on the 'real' indev/outdev
766 * because of the physdev module. For ARP, indev and outdev are the
767 * bridge ports.
768 */
br_nf_forward(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)769 static unsigned int br_nf_forward(void *priv,
770 struct sk_buff *skb,
771 const struct nf_hook_state *state)
772 {
773 if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
774 is_pppoe_ip(skb, state->net))
775 return br_nf_forward_ip(skb, state, NFPROTO_IPV4);
776 if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
777 is_pppoe_ipv6(skb, state->net))
778 return br_nf_forward_ip(skb, state, NFPROTO_IPV6);
779 if (IS_ARP(skb) || is_vlan_arp(skb, state->net))
780 return br_nf_forward_arp(skb, state);
781
782 return NF_ACCEPT;
783 }
784
br_nf_push_frag_xmit(struct net * net,struct sock * sk,struct sk_buff * skb)785 static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
786 {
787 struct brnf_frag_data *data;
788 int err;
789
790 data = this_cpu_ptr(&brnf_frag_data_storage);
791 err = skb_cow_head(skb, data->size);
792
793 if (err) {
794 kfree_skb(skb);
795 return 0;
796 }
797
798 if (data->vlan_proto)
799 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
800
801 skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
802 __skb_push(skb, data->encap_size);
803
804 nf_bridge_info_free(skb);
805 return br_dev_queue_push_xmit(net, sk, skb);
806 }
807
808 static int
br_nf_ip_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,int (* output)(struct net *,struct sock *,struct sk_buff *))809 br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
810 int (*output)(struct net *, struct sock *, struct sk_buff *))
811 {
812 unsigned int mtu = ip_skb_dst_mtu(sk, skb);
813 struct iphdr *iph = ip_hdr(skb);
814
815 if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
816 (IPCB(skb)->frag_max_size &&
817 IPCB(skb)->frag_max_size > mtu))) {
818 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
819 kfree_skb(skb);
820 return -EMSGSIZE;
821 }
822
823 return ip_do_fragment(net, sk, skb, output);
824 }
825
nf_bridge_mtu_reduction(const struct sk_buff * skb)826 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
827 {
828 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
829
830 if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
831 return PPPOE_SES_HLEN;
832 return 0;
833 }
834
br_nf_dev_queue_xmit(struct net * net,struct sock * sk,struct sk_buff * skb)835 static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
836 {
837 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
838 unsigned int mtu, mtu_reserved;
839 int ret;
840
841 mtu_reserved = nf_bridge_mtu_reduction(skb);
842 mtu = skb->dev->mtu;
843
844 if (nf_bridge->pkt_otherhost) {
845 skb->pkt_type = PACKET_OTHERHOST;
846 nf_bridge->pkt_otherhost = false;
847 }
848
849 if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
850 mtu = nf_bridge->frag_max_size;
851
852 nf_bridge_update_protocol(skb);
853 nf_bridge_push_encap_header(skb);
854
855 if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
856 nf_bridge_info_free(skb);
857 return br_dev_queue_push_xmit(net, sk, skb);
858 }
859
860 /* Fragmentation on metadata/template dst is not supported */
861 if (unlikely(!skb_valid_dst(skb)))
862 goto drop;
863
864 /* This is wrong! We should preserve the original fragment
865 * boundaries by preserving frag_list rather than refragmenting.
866 */
867 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) &&
868 skb->protocol == htons(ETH_P_IP)) {
869 struct brnf_frag_data *data;
870
871 if (br_validate_ipv4(net, skb))
872 goto drop;
873
874 IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
875
876 local_lock_nested_bh(&brnf_frag_data_storage.bh_lock);
877 data = this_cpu_ptr(&brnf_frag_data_storage);
878
879 if (skb_vlan_tag_present(skb)) {
880 data->vlan_tci = skb->vlan_tci;
881 data->vlan_proto = skb->vlan_proto;
882 } else {
883 data->vlan_proto = 0;
884 }
885
886 data->encap_size = nf_bridge_encap_header_len(skb);
887 data->size = ETH_HLEN + data->encap_size;
888
889 skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
890 data->size);
891
892 ret = br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
893 local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock);
894 return ret;
895 }
896 if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
897 skb->protocol == htons(ETH_P_IPV6)) {
898 struct brnf_frag_data *data;
899
900 if (br_validate_ipv6(net, skb))
901 goto drop;
902
903 IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
904
905 local_lock_nested_bh(&brnf_frag_data_storage.bh_lock);
906 data = this_cpu_ptr(&brnf_frag_data_storage);
907 data->encap_size = nf_bridge_encap_header_len(skb);
908 data->size = ETH_HLEN + data->encap_size;
909
910 skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
911 data->size);
912
913 ret = ip6_fragment(net, sk, skb, br_nf_push_frag_xmit);
914 local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock);
915 return ret;
916 }
917 nf_bridge_info_free(skb);
918 return br_dev_queue_push_xmit(net, sk, skb);
919 drop:
920 kfree_skb(skb);
921 return 0;
922 }
923
924 /* PF_BRIDGE/POST_ROUTING ********************************************/
br_nf_post_routing(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)925 static unsigned int br_nf_post_routing(void *priv,
926 struct sk_buff *skb,
927 const struct nf_hook_state *state)
928 {
929 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
930 struct net_device *realoutdev = bridge_parent(skb->dev);
931 u_int8_t pf;
932
933 /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in
934 * on a bridge, but was delivered locally and is now being routed:
935 *
936 * POST_ROUTING was already invoked from the ip stack.
937 */
938 if (!nf_bridge || !nf_bridge->physoutdev)
939 return NF_ACCEPT;
940
941 if (!realoutdev)
942 return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0);
943
944 if (IS_IP(skb) || is_vlan_ip(skb, state->net) ||
945 is_pppoe_ip(skb, state->net))
946 pf = NFPROTO_IPV4;
947 else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) ||
948 is_pppoe_ipv6(skb, state->net))
949 pf = NFPROTO_IPV6;
950 else
951 return NF_ACCEPT;
952
953 if (skb->pkt_type == PACKET_OTHERHOST) {
954 skb->pkt_type = PACKET_HOST;
955 nf_bridge->pkt_otherhost = true;
956 }
957
958 nf_bridge_pull_encap_header(skb);
959 if (pf == NFPROTO_IPV4)
960 skb->protocol = htons(ETH_P_IP);
961 else
962 skb->protocol = htons(ETH_P_IPV6);
963
964 NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb,
965 NULL, realoutdev,
966 br_nf_dev_queue_xmit);
967
968 return NF_STOLEN;
969 }
970
971 /* IP/SABOTAGE *****************************************************/
972 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
973 * for the second time. */
ip_sabotage_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)974 static unsigned int ip_sabotage_in(void *priv,
975 struct sk_buff *skb,
976 const struct nf_hook_state *state)
977 {
978 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
979
980 if (nf_bridge) {
981 if (nf_bridge->sabotage_in_done)
982 return NF_ACCEPT;
983
984 if (!nf_bridge->in_prerouting &&
985 !netif_is_l3_master(skb->dev) &&
986 !netif_is_l3_slave(skb->dev)) {
987 nf_bridge->sabotage_in_done = 1;
988 state->okfn(state->net, state->sk, skb);
989 return NF_STOLEN;
990 }
991 }
992
993 return NF_ACCEPT;
994 }
995
996 /* This is called when br_netfilter has called into iptables/netfilter,
997 * and DNAT has taken place on a bridge-forwarded packet.
998 *
999 * neigh->output has created a new MAC header, with local br0 MAC
1000 * as saddr.
1001 *
1002 * This restores the original MAC saddr of the bridged packet
1003 * before invoking bridge forward logic to transmit the packet.
1004 */
br_nf_pre_routing_finish_bridge_slow(struct sk_buff * skb)1005 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
1006 {
1007 struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
1008 struct net_device *br_indev;
1009
1010 br_indev = nf_bridge_get_physindev(skb, dev_net(skb->dev));
1011 if (!br_indev) {
1012 kfree_skb(skb);
1013 return;
1014 }
1015
1016 skb_pull(skb, ETH_HLEN);
1017 nf_bridge->bridged_dnat = 0;
1018
1019 BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
1020
1021 skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
1022 nf_bridge->neigh_header,
1023 ETH_HLEN - ETH_ALEN);
1024 skb->dev = br_indev;
1025
1026 nf_bridge->physoutdev = NULL;
1027 br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
1028 }
1029
br_nf_dev_xmit(struct sk_buff * skb)1030 static int br_nf_dev_xmit(struct sk_buff *skb)
1031 {
1032 const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
1033
1034 if (nf_bridge && nf_bridge->bridged_dnat) {
1035 br_nf_pre_routing_finish_bridge_slow(skb);
1036 return 1;
1037 }
1038 return 0;
1039 }
1040
1041 static const struct nf_br_ops br_ops = {
1042 .br_dev_xmit_hook = br_nf_dev_xmit,
1043 };
1044
1045 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
1046 * br_dev_queue_push_xmit is called afterwards */
1047 static const struct nf_hook_ops br_nf_ops[] = {
1048 {
1049 .hook = br_nf_pre_routing,
1050 .pf = NFPROTO_BRIDGE,
1051 .hooknum = NF_BR_PRE_ROUTING,
1052 .priority = NF_BR_PRI_BRNF,
1053 },
1054 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1055 {
1056 .hook = br_nf_local_in,
1057 .pf = NFPROTO_BRIDGE,
1058 .hooknum = NF_BR_LOCAL_IN,
1059 .priority = NF_BR_PRI_LAST,
1060 },
1061 #endif
1062 {
1063 .hook = br_nf_forward,
1064 .pf = NFPROTO_BRIDGE,
1065 .hooknum = NF_BR_FORWARD,
1066 .priority = NF_BR_PRI_BRNF,
1067 },
1068 {
1069 .hook = br_nf_post_routing,
1070 .pf = NFPROTO_BRIDGE,
1071 .hooknum = NF_BR_POST_ROUTING,
1072 .priority = NF_BR_PRI_LAST,
1073 },
1074 {
1075 .hook = ip_sabotage_in,
1076 .pf = NFPROTO_IPV4,
1077 .hooknum = NF_INET_PRE_ROUTING,
1078 .priority = NF_IP_PRI_FIRST,
1079 },
1080 {
1081 .hook = ip_sabotage_in,
1082 .pf = NFPROTO_IPV6,
1083 .hooknum = NF_INET_PRE_ROUTING,
1084 .priority = NF_IP6_PRI_FIRST,
1085 },
1086 };
1087
brnf_device_event(struct notifier_block * unused,unsigned long event,void * ptr)1088 static int brnf_device_event(struct notifier_block *unused, unsigned long event,
1089 void *ptr)
1090 {
1091 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1092 struct brnf_net *brnet;
1093 struct net *net;
1094 int ret;
1095
1096 if (event != NETDEV_REGISTER || !netif_is_bridge_master(dev))
1097 return NOTIFY_DONE;
1098
1099 ASSERT_RTNL();
1100
1101 net = dev_net(dev);
1102 brnet = net_generic(net, brnf_net_id);
1103 if (brnet->enabled)
1104 return NOTIFY_OK;
1105
1106 ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
1107 if (ret)
1108 return NOTIFY_BAD;
1109
1110 brnet->enabled = true;
1111 return NOTIFY_OK;
1112 }
1113
1114 static struct notifier_block brnf_notifier __read_mostly = {
1115 .notifier_call = brnf_device_event,
1116 };
1117
1118 /* recursively invokes nf_hook_slow (again), skipping already-called
1119 * hooks (< NF_BR_PRI_BRNF).
1120 *
1121 * Called with rcu read lock held.
1122 */
br_nf_hook_thresh(unsigned int hook,struct net * net,struct sock * sk,struct sk_buff * skb,struct net_device * indev,struct net_device * outdev,int (* okfn)(struct net *,struct sock *,struct sk_buff *))1123 int br_nf_hook_thresh(unsigned int hook, struct net *net,
1124 struct sock *sk, struct sk_buff *skb,
1125 struct net_device *indev,
1126 struct net_device *outdev,
1127 int (*okfn)(struct net *, struct sock *,
1128 struct sk_buff *))
1129 {
1130 const struct nf_hook_entries *e;
1131 struct nf_hook_state state;
1132 struct nf_hook_ops **ops;
1133 unsigned int i;
1134 int ret;
1135
1136 e = rcu_dereference(net->nf.hooks_bridge[hook]);
1137 if (!e)
1138 return okfn(net, sk, skb);
1139
1140 ops = nf_hook_entries_get_hook_ops(e);
1141 for (i = 0; i < e->num_hook_entries; i++) {
1142 /* These hooks have already been called */
1143 if (ops[i]->priority < NF_BR_PRI_BRNF)
1144 continue;
1145
1146 /* These hooks have not been called yet, run them. */
1147 if (ops[i]->priority > NF_BR_PRI_BRNF)
1148 break;
1149
1150 /* take a closer look at NF_BR_PRI_BRNF. */
1151 if (ops[i]->hook == br_nf_pre_routing) {
1152 /* This hook diverted the skb to this function,
1153 * hooks after this have not been run yet.
1154 */
1155 i++;
1156 break;
1157 }
1158 }
1159
1160 nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
1161 sk, net, okfn);
1162
1163 ret = nf_hook_slow(skb, &state, e, i);
1164 if (ret == 1)
1165 ret = okfn(net, sk, skb);
1166
1167 return ret;
1168 }
1169
1170 #ifdef CONFIG_SYSCTL
1171 static
brnf_sysctl_call_tables(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)1172 int brnf_sysctl_call_tables(const struct ctl_table *ctl, int write,
1173 void *buffer, size_t *lenp, loff_t *ppos)
1174 {
1175 int ret;
1176
1177 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1178
1179 if (write && *(int *)(ctl->data))
1180 *(int *)(ctl->data) = 1;
1181 return ret;
1182 }
1183
1184 static struct ctl_table brnf_table[] = {
1185 {
1186 .procname = "bridge-nf-call-arptables",
1187 .maxlen = sizeof(int),
1188 .mode = 0644,
1189 .proc_handler = brnf_sysctl_call_tables,
1190 },
1191 {
1192 .procname = "bridge-nf-call-iptables",
1193 .maxlen = sizeof(int),
1194 .mode = 0644,
1195 .proc_handler = brnf_sysctl_call_tables,
1196 },
1197 {
1198 .procname = "bridge-nf-call-ip6tables",
1199 .maxlen = sizeof(int),
1200 .mode = 0644,
1201 .proc_handler = brnf_sysctl_call_tables,
1202 },
1203 {
1204 .procname = "bridge-nf-filter-vlan-tagged",
1205 .maxlen = sizeof(int),
1206 .mode = 0644,
1207 .proc_handler = brnf_sysctl_call_tables,
1208 },
1209 {
1210 .procname = "bridge-nf-filter-pppoe-tagged",
1211 .maxlen = sizeof(int),
1212 .mode = 0644,
1213 .proc_handler = brnf_sysctl_call_tables,
1214 },
1215 {
1216 .procname = "bridge-nf-pass-vlan-input-dev",
1217 .maxlen = sizeof(int),
1218 .mode = 0644,
1219 .proc_handler = brnf_sysctl_call_tables,
1220 },
1221 };
1222
br_netfilter_sysctl_default(struct brnf_net * brnf)1223 static inline void br_netfilter_sysctl_default(struct brnf_net *brnf)
1224 {
1225 brnf->call_iptables = 1;
1226 brnf->call_ip6tables = 1;
1227 brnf->call_arptables = 1;
1228 brnf->filter_vlan_tagged = 0;
1229 brnf->filter_pppoe_tagged = 0;
1230 brnf->pass_vlan_indev = 0;
1231 }
1232
br_netfilter_sysctl_init_net(struct net * net)1233 static int br_netfilter_sysctl_init_net(struct net *net)
1234 {
1235 struct ctl_table *table = brnf_table;
1236 struct brnf_net *brnet;
1237
1238 if (!net_eq(net, &init_net)) {
1239 table = kmemdup(table, sizeof(brnf_table), GFP_KERNEL);
1240 if (!table)
1241 return -ENOMEM;
1242 }
1243
1244 brnet = net_generic(net, brnf_net_id);
1245 table[0].data = &brnet->call_arptables;
1246 table[1].data = &brnet->call_iptables;
1247 table[2].data = &brnet->call_ip6tables;
1248 table[3].data = &brnet->filter_vlan_tagged;
1249 table[4].data = &brnet->filter_pppoe_tagged;
1250 table[5].data = &brnet->pass_vlan_indev;
1251
1252 br_netfilter_sysctl_default(brnet);
1253
1254 brnet->ctl_hdr = register_net_sysctl_sz(net, "net/bridge", table,
1255 ARRAY_SIZE(brnf_table));
1256 if (!brnet->ctl_hdr) {
1257 if (!net_eq(net, &init_net))
1258 kfree(table);
1259
1260 return -ENOMEM;
1261 }
1262
1263 return 0;
1264 }
1265
br_netfilter_sysctl_exit_net(struct net * net,struct brnf_net * brnet)1266 static void br_netfilter_sysctl_exit_net(struct net *net,
1267 struct brnf_net *brnet)
1268 {
1269 const struct ctl_table *table = brnet->ctl_hdr->ctl_table_arg;
1270
1271 unregister_net_sysctl_table(brnet->ctl_hdr);
1272 if (!net_eq(net, &init_net))
1273 kfree(table);
1274 }
1275
brnf_init_net(struct net * net)1276 static int __net_init brnf_init_net(struct net *net)
1277 {
1278 return br_netfilter_sysctl_init_net(net);
1279 }
1280 #endif
1281
brnf_exit_net(struct net * net)1282 static void __net_exit brnf_exit_net(struct net *net)
1283 {
1284 struct brnf_net *brnet;
1285
1286 brnet = net_generic(net, brnf_net_id);
1287 if (brnet->enabled) {
1288 nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
1289 brnet->enabled = false;
1290 }
1291
1292 #ifdef CONFIG_SYSCTL
1293 br_netfilter_sysctl_exit_net(net, brnet);
1294 #endif
1295 }
1296
1297 static struct pernet_operations brnf_net_ops __read_mostly = {
1298 #ifdef CONFIG_SYSCTL
1299 .init = brnf_init_net,
1300 #endif
1301 .exit = brnf_exit_net,
1302 .id = &brnf_net_id,
1303 .size = sizeof(struct brnf_net),
1304 };
1305
br_netfilter_init(void)1306 static int __init br_netfilter_init(void)
1307 {
1308 int ret;
1309
1310 ret = register_pernet_subsys(&brnf_net_ops);
1311 if (ret < 0)
1312 return ret;
1313
1314 ret = register_netdevice_notifier(&brnf_notifier);
1315 if (ret < 0) {
1316 unregister_pernet_subsys(&brnf_net_ops);
1317 return ret;
1318 }
1319
1320 RCU_INIT_POINTER(nf_br_ops, &br_ops);
1321 printk(KERN_NOTICE "Bridge firewalling registered\n");
1322 return 0;
1323 }
1324
br_netfilter_fini(void)1325 static void __exit br_netfilter_fini(void)
1326 {
1327 RCU_INIT_POINTER(nf_br_ops, NULL);
1328 unregister_netdevice_notifier(&brnf_notifier);
1329 unregister_pernet_subsys(&brnf_net_ops);
1330 }
1331
1332 module_init(br_netfilter_init);
1333 module_exit(br_netfilter_fini);
1334
1335 MODULE_LICENSE("GPL");
1336 MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
1337 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
1338 MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
1339