1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org>
4 */
5
6 #include <linux/kernel.h>
7 #include <linux/if_vlan.h>
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/netlink.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter/nf_tables.h>
13 #include <net/netfilter/nf_tables_core.h>
14 #include <net/netfilter/nf_tables.h>
15 #include <net/netfilter/nft_meta.h>
16 #include <net/netfilter/nf_tables_offload.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <net/gre.h>
20 #include <net/geneve.h>
21 #include <net/ip.h>
22 #include <linux/icmpv6.h>
23 #include <linux/ip.h>
24 #include <linux/ipv6.h>
25
26 struct nft_inner_tun_ctx_locked {
27 struct nft_inner_tun_ctx ctx;
28 local_lock_t bh_lock;
29 };
30
31 static DEFINE_PER_CPU(struct nft_inner_tun_ctx_locked, nft_pcpu_tun_ctx) = {
32 .bh_lock = INIT_LOCAL_LOCK(bh_lock),
33 };
34
35 /* Same layout as nft_expr but it embeds the private expression data area. */
36 struct __nft_expr {
37 const struct nft_expr_ops *ops;
38 union {
39 struct nft_payload payload;
40 struct nft_meta meta;
41 } __attribute__((aligned(__alignof__(u64))));
42 };
43
44 enum {
45 NFT_INNER_EXPR_PAYLOAD,
46 NFT_INNER_EXPR_META,
47 };
48
49 struct nft_inner {
50 u8 flags;
51 u8 hdrsize;
52 u8 type;
53 u8 expr_type;
54
55 struct __nft_expr expr;
56 };
57
nft_inner_parse_l2l3(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * ctx,u32 off)58 static int nft_inner_parse_l2l3(const struct nft_inner *priv,
59 const struct nft_pktinfo *pkt,
60 struct nft_inner_tun_ctx *ctx, u32 off)
61 {
62 __be16 llproto, outer_llproto;
63 u32 nhoff, thoff;
64
65 if (priv->flags & NFT_INNER_LL) {
66 struct vlan_ethhdr *veth, _veth;
67 struct ethhdr *eth, _eth;
68 u32 hdrsize;
69
70 eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth);
71 if (!eth)
72 return -1;
73
74 switch (eth->h_proto) {
75 case htons(ETH_P_IP):
76 case htons(ETH_P_IPV6):
77 llproto = eth->h_proto;
78 hdrsize = sizeof(_eth);
79 break;
80 case htons(ETH_P_8021Q):
81 veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth);
82 if (!veth)
83 return -1;
84
85 outer_llproto = veth->h_vlan_encapsulated_proto;
86 llproto = veth->h_vlan_proto;
87 hdrsize = sizeof(_veth);
88 break;
89 default:
90 return -1;
91 }
92
93 ctx->inner_lloff = off;
94 ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL;
95 off += hdrsize;
96 } else {
97 struct iphdr *iph;
98 u32 _version;
99
100 iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version);
101 if (!iph)
102 return -1;
103
104 switch (iph->version) {
105 case 4:
106 llproto = htons(ETH_P_IP);
107 break;
108 case 6:
109 llproto = htons(ETH_P_IPV6);
110 break;
111 default:
112 return -1;
113 }
114 }
115
116 ctx->llproto = llproto;
117 if (llproto == htons(ETH_P_8021Q))
118 llproto = outer_llproto;
119
120 nhoff = off;
121
122 switch (llproto) {
123 case htons(ETH_P_IP): {
124 struct iphdr *iph, _iph;
125
126 iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph);
127 if (!iph)
128 return -1;
129
130 if (iph->ihl < 5 || iph->version != 4)
131 return -1;
132
133 ctx->inner_nhoff = nhoff;
134 ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH;
135
136 thoff = nhoff + (iph->ihl * 4);
137 if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) {
138 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
139 ctx->inner_thoff = thoff;
140 ctx->l4proto = iph->protocol;
141 }
142 }
143 break;
144 case htons(ETH_P_IPV6): {
145 struct ipv6hdr *ip6h, _ip6h;
146 int fh_flags = IP6_FH_F_AUTH;
147 unsigned short fragoff;
148 int l4proto;
149
150 ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h);
151 if (!ip6h)
152 return -1;
153
154 if (ip6h->version != 6)
155 return -1;
156
157 ctx->inner_nhoff = nhoff;
158 ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH;
159
160 thoff = nhoff;
161 l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags);
162 if (l4proto < 0 || thoff > U16_MAX)
163 return -1;
164
165 if (fragoff == 0) {
166 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
167 ctx->inner_thoff = thoff;
168 ctx->l4proto = l4proto;
169 }
170 }
171 break;
172 default:
173 return -1;
174 }
175
176 return 0;
177 }
178
nft_inner_parse_tunhdr(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * ctx,u32 * off)179 static int nft_inner_parse_tunhdr(const struct nft_inner *priv,
180 const struct nft_pktinfo *pkt,
181 struct nft_inner_tun_ctx *ctx, u32 *off)
182 {
183 if (pkt->tprot == IPPROTO_GRE) {
184 ctx->inner_tunoff = pkt->thoff;
185 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN;
186 return 0;
187 }
188
189 if (pkt->tprot != IPPROTO_UDP)
190 return -1;
191
192 ctx->inner_tunoff = *off;
193 ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN;
194 *off += priv->hdrsize;
195
196 switch (priv->type) {
197 case NFT_INNER_GENEVE: {
198 struct genevehdr *gnvh, _gnvh;
199
200 gnvh = skb_header_pointer(pkt->skb, pkt->inneroff,
201 sizeof(_gnvh), &_gnvh);
202 if (!gnvh)
203 return -1;
204
205 *off += gnvh->opt_len * 4;
206 }
207 break;
208 default:
209 break;
210 }
211
212 return 0;
213 }
214
nft_inner_parse(const struct nft_inner * priv,struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)215 static int nft_inner_parse(const struct nft_inner *priv,
216 struct nft_pktinfo *pkt,
217 struct nft_inner_tun_ctx *tun_ctx)
218 {
219 u32 off = pkt->inneroff;
220
221 if (priv->flags & NFT_INNER_HDRSIZE &&
222 nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0)
223 return -1;
224
225 if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) {
226 if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0)
227 return -1;
228 } else if (priv->flags & NFT_INNER_TH) {
229 tun_ctx->inner_thoff = off;
230 tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH;
231 }
232
233 tun_ctx->type = priv->type;
234 tun_ctx->cookie = (unsigned long)pkt->skb;
235 pkt->flags |= NFT_PKTINFO_INNER_FULL;
236
237 return 0;
238 }
239
nft_inner_restore_tun_ctx(const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)240 static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt,
241 struct nft_inner_tun_ctx *tun_ctx)
242 {
243 struct nft_inner_tun_ctx *this_cpu_tun_ctx;
244
245 local_bh_disable();
246 local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
247 this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx);
248 if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) {
249 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
250 local_bh_enable();
251 return false;
252 }
253 *tun_ctx = *this_cpu_tun_ctx;
254 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
255 local_bh_enable();
256
257 return true;
258 }
259
nft_inner_save_tun_ctx(const struct nft_pktinfo * pkt,const struct nft_inner_tun_ctx * tun_ctx)260 static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt,
261 const struct nft_inner_tun_ctx *tun_ctx)
262 {
263 struct nft_inner_tun_ctx *this_cpu_tun_ctx;
264
265 local_bh_disable();
266 local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
267 this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx);
268 if (this_cpu_tun_ctx->cookie != tun_ctx->cookie)
269 *this_cpu_tun_ctx = *tun_ctx;
270 local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
271 local_bh_enable();
272 }
273
nft_inner_parse_needed(const struct nft_inner * priv,const struct nft_pktinfo * pkt,struct nft_inner_tun_ctx * tun_ctx)274 static bool nft_inner_parse_needed(const struct nft_inner *priv,
275 const struct nft_pktinfo *pkt,
276 struct nft_inner_tun_ctx *tun_ctx)
277 {
278 if (!(pkt->flags & NFT_PKTINFO_INNER_FULL))
279 return true;
280
281 if (!nft_inner_restore_tun_ctx(pkt, tun_ctx))
282 return true;
283
284 if (priv->type != tun_ctx->type)
285 return true;
286
287 return false;
288 }
289
nft_inner_eval(const struct nft_expr * expr,struct nft_regs * regs,const struct nft_pktinfo * pkt)290 static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs,
291 const struct nft_pktinfo *pkt)
292 {
293 const struct nft_inner *priv = nft_expr_priv(expr);
294 struct nft_inner_tun_ctx tun_ctx = {};
295
296 if (nft_payload_inner_offset(pkt) < 0)
297 goto err;
298
299 if (nft_inner_parse_needed(priv, pkt, &tun_ctx) &&
300 nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0)
301 goto err;
302
303 switch (priv->expr_type) {
304 case NFT_INNER_EXPR_PAYLOAD:
305 nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
306 break;
307 case NFT_INNER_EXPR_META:
308 nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx);
309 break;
310 default:
311 WARN_ON_ONCE(1);
312 goto err;
313 }
314 nft_inner_save_tun_ctx(pkt, &tun_ctx);
315
316 return;
317 err:
318 regs->verdict.code = NFT_BREAK;
319 }
320
321 static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = {
322 [NFTA_INNER_NUM] = { .type = NLA_U32 },
323 [NFTA_INNER_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_INNER_MASK),
324 [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 },
325 [NFTA_INNER_TYPE] = { .type = NLA_U32 },
326 [NFTA_INNER_EXPR] = { .type = NLA_NESTED },
327 };
328
329 struct nft_expr_info {
330 const struct nft_expr_ops *ops;
331 const struct nlattr *attr;
332 struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
333 };
334
nft_inner_init(const struct nft_ctx * ctx,const struct nft_expr * expr,const struct nlattr * const tb[])335 static int nft_inner_init(const struct nft_ctx *ctx,
336 const struct nft_expr *expr,
337 const struct nlattr * const tb[])
338 {
339 struct nft_inner *priv = nft_expr_priv(expr);
340 u32 flags, hdrsize, type, num;
341 struct nft_expr_info expr_info;
342 int err;
343
344 if (!tb[NFTA_INNER_FLAGS] ||
345 !tb[NFTA_INNER_NUM] ||
346 !tb[NFTA_INNER_HDRSIZE] ||
347 !tb[NFTA_INNER_TYPE] ||
348 !tb[NFTA_INNER_EXPR])
349 return -EINVAL;
350
351 flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS]));
352 if (flags & ~NFT_INNER_MASK)
353 return -EOPNOTSUPP;
354
355 num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM]));
356 if (num != 0)
357 return -EOPNOTSUPP;
358
359 hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE]));
360 type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE]));
361
362 if (type > U8_MAX)
363 return -EINVAL;
364
365 if (flags & NFT_INNER_HDRSIZE) {
366 if (hdrsize == 0 || hdrsize > 64)
367 return -EOPNOTSUPP;
368 }
369
370 priv->flags = flags;
371 priv->hdrsize = hdrsize;
372 priv->type = type;
373
374 err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info);
375 if (err < 0)
376 return err;
377
378 priv->expr.ops = expr_info.ops;
379
380 if (!strcmp(expr_info.ops->type->name, "payload"))
381 priv->expr_type = NFT_INNER_EXPR_PAYLOAD;
382 else if (!strcmp(expr_info.ops->type->name, "meta"))
383 priv->expr_type = NFT_INNER_EXPR_META;
384 else
385 return -EINVAL;
386
387 err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr,
388 (const struct nlattr * const*)expr_info.tb);
389 if (err < 0)
390 return err;
391
392 return 0;
393 }
394
nft_inner_dump(struct sk_buff * skb,const struct nft_expr * expr,bool reset)395 static int nft_inner_dump(struct sk_buff *skb,
396 const struct nft_expr *expr, bool reset)
397 {
398 const struct nft_inner *priv = nft_expr_priv(expr);
399
400 if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) ||
401 nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) ||
402 nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) ||
403 nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize)))
404 goto nla_put_failure;
405
406 if (nft_expr_dump(skb, NFTA_INNER_EXPR,
407 (struct nft_expr *)&priv->expr, reset) < 0)
408 goto nla_put_failure;
409
410 return 0;
411
412 nla_put_failure:
413 return -1;
414 }
415
416 static const struct nft_expr_ops nft_inner_ops = {
417 .type = &nft_inner_type,
418 .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)),
419 .eval = nft_inner_eval,
420 .init = nft_inner_init,
421 .dump = nft_inner_dump,
422 };
423
424 struct nft_expr_type nft_inner_type __read_mostly = {
425 .name = "inner",
426 .ops = &nft_inner_ops,
427 .policy = nft_inner_policy,
428 .maxattr = NFTA_INNER_MAX,
429 .owner = THIS_MODULE,
430 };
431