xref: /linux/net/ipv6/netfilter.c (revision 77a6401a8722be20ea8db98ac900c93ccc7068ff)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * IPv6 specific functions of netfilter core
4  *
5  * Rusty Russell (C) 2000
6  * Patrick McHardy (C) 2006-2012
7  */
8 #include <linux/kernel.h>
9 #include <linux/init.h>
10 #include <linux/ipv6.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter_ipv6.h>
13 #include <linux/export.h>
14 #include <net/addrconf.h>
15 #include <net/dst.h>
16 #include <net/ipv6.h>
17 #include <net/ip6_route.h>
18 #include <net/xfrm.h>
19 #include <net/netfilter/nf_queue.h>
20 #include <net/netfilter/nf_conntrack_bridge.h>
21 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
22 #include "../bridge/br_private.h"
23 
24 int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
25 {
26 	const struct ipv6hdr *iph = ipv6_hdr(skb);
27 	struct sock *sk = sk_to_full_sk(sk_partial);
28 	struct net_device *dev = skb_dst_dev(skb);
29 	struct flow_keys flkeys;
30 	unsigned int hh_len;
31 	struct dst_entry *dst;
32 	int strict = (ipv6_addr_type(&iph->daddr) &
33 		      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
34 	struct flowi6 fl6 = {
35 		.flowi6_l3mdev = l3mdev_master_ifindex(dev),
36 		.flowi6_mark = skb->mark,
37 		.flowi6_uid = sock_net_uid(net, sk),
38 		.daddr = iph->daddr,
39 		.saddr = iph->saddr,
40 		.flowlabel = ip6_flowinfo(iph),
41 	};
42 	int err;
43 
44 	if (sk && sk->sk_bound_dev_if)
45 		fl6.flowi6_oif = sk->sk_bound_dev_if;
46 	else if (strict)
47 		fl6.flowi6_oif = dev->ifindex;
48 
49 	fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
50 	dst = ip6_route_output(net, sk, &fl6);
51 	err = dst->error;
52 	if (err) {
53 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
54 		net_dbg_ratelimited("ip6_route_me_harder: No more route\n");
55 		dst_release(dst);
56 		return err;
57 	}
58 
59 	/* Drop old route. */
60 	skb_dst_drop(skb);
61 
62 	skb_dst_set(skb, dst);
63 
64 #ifdef CONFIG_XFRM
65 	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
66 	    xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
67 		/* ignore return value from skb_dstref_steal, xfrm_lookup takes
68 		 * care of dropping the refcnt if needed.
69 		 */
70 		skb_dstref_steal(skb);
71 		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
72 		if (IS_ERR(dst))
73 			return PTR_ERR(dst);
74 		skb_dst_set(skb, dst);
75 	}
76 #endif
77 
78 	/* Change in oif may mean change in hh_len. */
79 	hh_len = skb_dst_dev(skb)->hard_header_len;
80 	if (skb_headroom(skb) < hh_len &&
81 	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
82 			     0, GFP_ATOMIC))
83 		return -ENOMEM;
84 
85 	return 0;
86 }
87 EXPORT_SYMBOL(ip6_route_me_harder);
88 
89 static int nf_ip6_reroute(struct sk_buff *skb,
90 			  const struct nf_queue_entry *entry)
91 {
92 	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
93 
94 	if (entry->state.hook == NF_INET_LOCAL_OUT) {
95 		const struct ipv6hdr *iph = ipv6_hdr(skb);
96 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
97 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
98 		    skb->mark != rt_info->mark)
99 			return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
100 	}
101 	return 0;
102 }
103 
104 int __nf_ip6_route(struct net *net, struct dst_entry **dst,
105 		   struct flowi *fl, bool strict)
106 {
107 	static const struct ipv6_pinfo fake_pinfo;
108 	static const struct inet_sock fake_sk = {
109 		/* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
110 		.sk.sk_bound_dev_if = 1,
111 		.pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
112 	};
113 	const void *sk = strict ? &fake_sk : NULL;
114 	struct dst_entry *result;
115 	int err;
116 
117 	result = ip6_route_output(net, sk, &fl->u.ip6);
118 	err = result->error;
119 	if (err)
120 		dst_release(result);
121 	else
122 		*dst = result;
123 	return err;
124 }
125 EXPORT_SYMBOL_GPL(__nf_ip6_route);
126 
127 int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
128 		    struct nf_bridge_frag_data *data,
129 		    int (*output)(struct net *, struct sock *sk,
130 				  const struct nf_bridge_frag_data *data,
131 				  struct sk_buff *))
132 {
133 	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
134 	u8 tstamp_type = skb->tstamp_type;
135 	ktime_t tstamp = skb->tstamp;
136 	struct ip6_frag_state state;
137 	u8 *prevhdr, nexthdr = 0;
138 	unsigned int mtu, hlen;
139 	int hroom, err = 0;
140 	__be32 frag_id;
141 
142 	err = ip6_find_1stfragopt(skb, &prevhdr);
143 	if (err < 0)
144 		goto blackhole;
145 	hlen = err;
146 	nexthdr = *prevhdr;
147 
148 	mtu = skb->dev->mtu;
149 	if (frag_max_size > mtu ||
150 	    frag_max_size < IPV6_MIN_MTU)
151 		goto blackhole;
152 
153 	mtu = frag_max_size;
154 	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
155 		goto blackhole;
156 	mtu -= hlen + sizeof(struct frag_hdr);
157 
158 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
159 				    &ipv6_hdr(skb)->saddr);
160 
161 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
162 	    (err = skb_checksum_help(skb)))
163 		goto blackhole;
164 
165 	hroom = LL_RESERVED_SPACE(skb->dev);
166 	if (skb_has_frag_list(skb)) {
167 		unsigned int first_len = skb_pagelen(skb);
168 		struct ip6_fraglist_iter iter;
169 		struct sk_buff *frag2;
170 
171 		if (first_len - hlen > mtu)
172 			goto blackhole;
173 
174 		if (skb_cloned(skb) ||
175 		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
176 			goto slow_path;
177 
178 		skb_walk_frags(skb, frag2) {
179 			if (frag2->len > mtu)
180 				goto blackhole;
181 
182 			/* Partially cloned skb? */
183 			if (skb_shared(frag2) ||
184 			    skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
185 				goto slow_path;
186 		}
187 
188 		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
189 					&iter);
190 		if (err < 0)
191 			goto blackhole;
192 
193 		for (;;) {
194 			/* Prepare header of the next frame,
195 			 * before previous one went down.
196 			 */
197 			if (iter.frag)
198 				ip6_fraglist_prepare(skb, &iter);
199 
200 			skb_set_delivery_time(skb, tstamp, tstamp_type);
201 			err = output(net, sk, data, skb);
202 			if (err || !iter.frag)
203 				break;
204 
205 			skb = ip6_fraglist_next(&iter);
206 		}
207 
208 		kfree(iter.tmp_hdr);
209 		if (!err)
210 			return 0;
211 
212 		kfree_skb_list(iter.frag);
213 		return err;
214 	}
215 slow_path:
216 	/* This is a linearized skbuff, the original geometry is lost for us.
217 	 * This may also be a clone skbuff, we could preserve the geometry for
218 	 * the copies but probably not worth the effort.
219 	 */
220 	ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom,
221 		      LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id,
222 		      &state);
223 
224 	while (state.left > 0) {
225 		struct sk_buff *skb2;
226 
227 		skb2 = ip6_frag_next(skb, &state);
228 		if (IS_ERR(skb2)) {
229 			err = PTR_ERR(skb2);
230 			goto blackhole;
231 		}
232 
233 		skb_set_delivery_time(skb2, tstamp, tstamp_type);
234 		err = output(net, sk, data, skb2);
235 		if (err)
236 			goto blackhole;
237 	}
238 	consume_skb(skb);
239 	return err;
240 
241 blackhole:
242 	kfree_skb(skb);
243 	return 0;
244 }
245 EXPORT_SYMBOL_GPL(br_ip6_fragment);
246 
247 static const struct nf_ipv6_ops ipv6ops = {
248 #if IS_MODULE(CONFIG_IPV6)
249 	.chk_addr		= ipv6_chk_addr,
250 	.route_me_harder	= ip6_route_me_harder,
251 	.dev_get_saddr		= ipv6_dev_get_saddr,
252 	.route			= __nf_ip6_route,
253 #if IS_ENABLED(CONFIG_SYN_COOKIES)
254 	.cookie_init_sequence	= __cookie_v6_init_sequence,
255 	.cookie_v6_check	= __cookie_v6_check,
256 #endif
257 #endif
258 	.route_input		= ip6_route_input,
259 	.fragment		= ip6_fragment,
260 	.reroute		= nf_ip6_reroute,
261 #if IS_MODULE(CONFIG_IPV6)
262 	.br_fragment		= br_ip6_fragment,
263 #endif
264 };
265 
266 int __init ipv6_netfilter_init(void)
267 {
268 	RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
269 	return 0;
270 }
271 
272 /* This can be called from inet6_init() on errors, so it cannot
273  * be marked __exit. -DaveM
274  */
275 void ipv6_netfilter_fini(void)
276 {
277 	RCU_INIT_POINTER(nf_ipv6_ops, NULL);
278 }
279