xref: /linux/net/ipv6/seg6_local.c (revision 90e0d94d369d342e735a75174439482119b6c393)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  SR-IPv6 implementation
4  *
5  *  Authors:
6  *  David Lebrun <david.lebrun@uclouvain.be>
7  *  eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
8  */
9 
10 #include <linux/filter.h>
11 #include <linux/types.h>
12 #include <linux/skbuff.h>
13 #include <linux/net.h>
14 #include <linux/module.h>
15 #include <net/ip.h>
16 #include <net/lwtunnel.h>
17 #include <net/netevent.h>
18 #include <net/netns/generic.h>
19 #include <net/ip6_fib.h>
20 #include <net/route.h>
21 #include <net/seg6.h>
22 #include <linux/seg6.h>
23 #include <linux/seg6_local.h>
24 #include <net/addrconf.h>
25 #include <net/ip6_route.h>
26 #include <net/dst_cache.h>
27 #include <net/ip_tunnels.h>
28 #ifdef CONFIG_IPV6_SEG6_HMAC
29 #include <net/seg6_hmac.h>
30 #endif
31 #include <net/seg6_local.h>
32 #include <linux/etherdevice.h>
33 #include <linux/bpf.h>
34 #include <linux/netfilter.h>
35 
36 #define SEG6_F_ATTR(i)		BIT(i)
37 
38 struct seg6_local_lwt;
39 
40 /* callbacks used for customizing the creation and destruction of a behavior */
41 struct seg6_local_lwtunnel_ops {
42 	int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
43 			   struct netlink_ext_ack *extack);
44 	void (*destroy_state)(struct seg6_local_lwt *slwt);
45 };
46 
47 struct seg6_action_desc {
48 	int action;
49 	unsigned long attrs;
50 
51 	/* The optattrs field is used for specifying all the optional
52 	 * attributes supported by a specific behavior.
53 	 * It means that if one of these attributes is not provided in the
54 	 * netlink message during the behavior creation, no errors will be
55 	 * returned to the userspace.
56 	 *
57 	 * Each attribute can be only of two types (mutually exclusive):
58 	 * 1) required or 2) optional.
59 	 * Every user MUST obey to this rule! If you set an attribute as
60 	 * required the same attribute CANNOT be set as optional and vice
61 	 * versa.
62 	 */
63 	unsigned long optattrs;
64 
65 	int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
66 	int static_headroom;
67 
68 	struct seg6_local_lwtunnel_ops slwt_ops;
69 };
70 
71 struct bpf_lwt_prog {
72 	struct bpf_prog *prog;
73 	char *name;
74 };
75 
76 /* default length values (expressed in bits) for both Locator-Block and
77  * Locator-Node Function.
78  *
79  * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be:
80  *    i) greater than 0;
81  *   ii) evenly divisible by 8. In other terms, the lengths of the
82  *	 Locator-Block and Locator-Node Function must be byte-aligned (we can
83  *	 relax this constraint in the future if really needed).
84  *
85  * Moreover, a third condition must hold:
86  *  iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128.
87  *
88  * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS
89  * values are checked during the kernel compilation. If the compilation stops,
90  * check the value of these parameters to see if they meet conditions (i), (ii)
91  * and (iii).
92  */
93 #define SEG6_LOCAL_LCBLOCK_DBITS	32
94 #define SEG6_LOCAL_LCNODE_FN_DBITS	16
95 
96 /* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be
97  * used directly to check whether the lengths (in bits) of Locator-Block and
98  * Locator-Node Function are valid according to (i), (ii), (iii).
99  */
100 #define next_csid_chk_cntr_bits(blen, flen)		\
101 	((blen) + (flen) > 128)
102 
103 #define next_csid_chk_lcblock_bits(blen)		\
104 ({							\
105 	typeof(blen) __tmp = blen;			\
106 	(!__tmp || __tmp > 120 || (__tmp & 0x07));	\
107 })
108 
109 #define next_csid_chk_lcnode_fn_bits(flen)		\
110 	next_csid_chk_lcblock_bits(flen)
111 
112 /* Supported Flavor operations are reported in this bitmask */
113 #define SEG6_LOCAL_FLV_SUPP_OPS	(BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID))
114 
115 struct seg6_flavors_info {
116 	/* Flavor operations */
117 	__u32 flv_ops;
118 
119 	/* Locator-Block length, expressed in bits */
120 	__u8 lcblock_bits;
121 	/* Locator-Node Function length, expressed in bits*/
122 	__u8 lcnode_func_bits;
123 };
124 
125 enum seg6_end_dt_mode {
126 	DT_INVALID_MODE	= -EINVAL,
127 	DT_LEGACY_MODE	= 0,
128 	DT_VRF_MODE	= 1,
129 };
130 
131 struct seg6_end_dt_info {
132 	enum seg6_end_dt_mode mode;
133 
134 	struct net *net;
135 	/* VRF device associated to the routing table used by the SRv6
136 	 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
137 	 */
138 	int vrf_ifindex;
139 	int vrf_table;
140 
141 	/* tunneled packet family (IPv4 or IPv6).
142 	 * Protocol and header length are inferred from family.
143 	 */
144 	u16 family;
145 };
146 
147 struct pcpu_seg6_local_counters {
148 	u64_stats_t packets;
149 	u64_stats_t bytes;
150 	u64_stats_t errors;
151 
152 	struct u64_stats_sync syncp;
153 };
154 
155 /* This struct groups all the SRv6 Behavior counters supported so far.
156  *
157  * put_nla_counters() makes use of this data structure to collect all counter
158  * values after the per-CPU counter evaluation has been performed.
159  * Finally, each counter value (in seg6_local_counters) is stored in the
160  * corresponding netlink attribute and sent to user space.
161  *
162  * NB: we don't want to expose this structure to user space!
163  */
164 struct seg6_local_counters {
165 	__u64 packets;
166 	__u64 bytes;
167 	__u64 errors;
168 };
169 
170 #define seg6_local_alloc_pcpu_counters(__gfp)				\
171 	__netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters,	\
172 				  ((__gfp) | __GFP_ZERO))
173 
174 #define SEG6_F_LOCAL_COUNTERS	SEG6_F_ATTR(SEG6_LOCAL_COUNTERS)
175 
176 struct seg6_local_lwt {
177 	int action;
178 	struct ipv6_sr_hdr *srh;
179 	int table;
180 	struct in_addr nh4;
181 	struct in6_addr nh6;
182 	int iif;
183 	int oif;
184 	struct bpf_lwt_prog bpf;
185 #ifdef CONFIG_NET_L3_MASTER_DEV
186 	struct seg6_end_dt_info dt_info;
187 #endif
188 	struct seg6_flavors_info flv_info;
189 
190 	struct pcpu_seg6_local_counters __percpu *pcpu_counters;
191 
192 	int headroom;
193 	struct seg6_action_desc *desc;
194 	/* unlike the required attrs, we have to track the optional attributes
195 	 * that have been effectively parsed.
196 	 */
197 	unsigned long parsed_optattrs;
198 };
199 
200 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
201 {
202 	return (struct seg6_local_lwt *)lwt->data;
203 }
204 
205 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
206 {
207 	struct ipv6_sr_hdr *srh;
208 
209 	srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH);
210 	if (!srh)
211 		return NULL;
212 
213 #ifdef CONFIG_IPV6_SEG6_HMAC
214 	if (!seg6_hmac_validate_skb(skb))
215 		return NULL;
216 #endif
217 
218 	return srh;
219 }
220 
221 static bool decap_and_validate(struct sk_buff *skb, int proto)
222 {
223 	struct ipv6_sr_hdr *srh;
224 	unsigned int off = 0;
225 
226 	srh = seg6_get_srh(skb, 0);
227 	if (srh && srh->segments_left > 0)
228 		return false;
229 
230 #ifdef CONFIG_IPV6_SEG6_HMAC
231 	if (srh && !seg6_hmac_validate_skb(skb))
232 		return false;
233 #endif
234 
235 	if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
236 		return false;
237 
238 	if (!pskb_pull(skb, off))
239 		return false;
240 
241 	skb_postpull_rcsum(skb, skb_network_header(skb), off);
242 
243 	skb_reset_network_header(skb);
244 	skb_reset_transport_header(skb);
245 	if (iptunnel_pull_offloads(skb))
246 		return false;
247 
248 	return true;
249 }
250 
251 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
252 {
253 	struct in6_addr *addr;
254 
255 	srh->segments_left--;
256 	addr = srh->segments + srh->segments_left;
257 	*daddr = *addr;
258 }
259 
260 static int
261 seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
262 			u32 tbl_id, bool local_delivery)
263 {
264 	struct net *net = dev_net(skb->dev);
265 	struct ipv6hdr *hdr = ipv6_hdr(skb);
266 	int flags = RT6_LOOKUP_F_HAS_SADDR;
267 	struct dst_entry *dst = NULL;
268 	struct rt6_info *rt;
269 	struct flowi6 fl6;
270 	int dev_flags = 0;
271 
272 	memset(&fl6, 0, sizeof(fl6));
273 	fl6.flowi6_iif = skb->dev->ifindex;
274 	fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
275 	fl6.saddr = hdr->saddr;
276 	fl6.flowlabel = ip6_flowinfo(hdr);
277 	fl6.flowi6_mark = skb->mark;
278 	fl6.flowi6_proto = hdr->nexthdr;
279 
280 	if (nhaddr)
281 		fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
282 
283 	if (!tbl_id) {
284 		dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
285 	} else {
286 		struct fib6_table *table;
287 
288 		table = fib6_get_table(net, tbl_id);
289 		if (!table)
290 			goto out;
291 
292 		rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
293 		dst = &rt->dst;
294 	}
295 
296 	/* we want to discard traffic destined for local packet processing,
297 	 * if @local_delivery is set to false.
298 	 */
299 	if (!local_delivery)
300 		dev_flags |= IFF_LOOPBACK;
301 
302 	if (dst && (dst->dev->flags & dev_flags) && !dst->error) {
303 		dst_release(dst);
304 		dst = NULL;
305 	}
306 
307 out:
308 	if (!dst) {
309 		rt = net->ipv6.ip6_blk_hole_entry;
310 		dst = &rt->dst;
311 		dst_hold(dst);
312 	}
313 
314 	skb_dst_drop(skb);
315 	skb_dst_set(skb, dst);
316 	return dst->error;
317 }
318 
319 int seg6_lookup_nexthop(struct sk_buff *skb,
320 			struct in6_addr *nhaddr, u32 tbl_id)
321 {
322 	return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
323 }
324 
325 static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo)
326 {
327 	return finfo->lcblock_bits >> 3;
328 }
329 
330 static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo)
331 {
332 	return finfo->lcnode_func_bits >> 3;
333 }
334 
335 static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr,
336 				       const struct seg6_flavors_info *finfo)
337 {
338 	__u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
339 	__u8 blk_octects = seg6_flv_lcblock_octects(finfo);
340 	__u8 arg_octects;
341 	int i;
342 
343 	arg_octects = 16 - blk_octects - fnc_octects;
344 	for (i = 0; i < arg_octects; ++i) {
345 		if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00)
346 			return false;
347 	}
348 
349 	return true;
350 }
351 
352 /* assume that DA.Argument length > 0 */
353 static void seg6_next_csid_advance_arg(struct in6_addr *addr,
354 				       const struct seg6_flavors_info *finfo)
355 {
356 	__u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
357 	__u8 blk_octects = seg6_flv_lcblock_octects(finfo);
358 
359 	/* advance DA.Argument */
360 	memmove(&addr->s6_addr[blk_octects],
361 		&addr->s6_addr[blk_octects + fnc_octects],
362 		16 - blk_octects - fnc_octects);
363 
364 	memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
365 }
366 
367 static int input_action_end_core(struct sk_buff *skb,
368 				 struct seg6_local_lwt *slwt)
369 {
370 	struct ipv6_sr_hdr *srh;
371 
372 	srh = get_and_validate_srh(skb);
373 	if (!srh)
374 		goto drop;
375 
376 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
377 
378 	seg6_lookup_nexthop(skb, NULL, 0);
379 
380 	return dst_input(skb);
381 
382 drop:
383 	kfree_skb(skb);
384 	return -EINVAL;
385 }
386 
387 static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
388 {
389 	const struct seg6_flavors_info *finfo = &slwt->flv_info;
390 	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
391 
392 	if (seg6_next_csid_is_arg_zero(daddr, finfo))
393 		return input_action_end_core(skb, slwt);
394 
395 	/* update DA */
396 	seg6_next_csid_advance_arg(daddr, finfo);
397 
398 	seg6_lookup_nexthop(skb, NULL, 0);
399 
400 	return dst_input(skb);
401 }
402 
403 static bool seg6_next_csid_enabled(__u32 fops)
404 {
405 	return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
406 }
407 
408 /* regular endpoint function */
409 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
410 {
411 	const struct seg6_flavors_info *finfo = &slwt->flv_info;
412 
413 	if (seg6_next_csid_enabled(finfo->flv_ops))
414 		return end_next_csid_core(skb, slwt);
415 
416 	return input_action_end_core(skb, slwt);
417 }
418 
419 /* regular endpoint, and forward to specified nexthop */
420 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
421 {
422 	struct ipv6_sr_hdr *srh;
423 
424 	srh = get_and_validate_srh(skb);
425 	if (!srh)
426 		goto drop;
427 
428 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
429 
430 	seg6_lookup_nexthop(skb, &slwt->nh6, 0);
431 
432 	return dst_input(skb);
433 
434 drop:
435 	kfree_skb(skb);
436 	return -EINVAL;
437 }
438 
439 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
440 {
441 	struct ipv6_sr_hdr *srh;
442 
443 	srh = get_and_validate_srh(skb);
444 	if (!srh)
445 		goto drop;
446 
447 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
448 
449 	seg6_lookup_nexthop(skb, NULL, slwt->table);
450 
451 	return dst_input(skb);
452 
453 drop:
454 	kfree_skb(skb);
455 	return -EINVAL;
456 }
457 
458 /* decapsulate and forward inner L2 frame on specified interface */
459 static int input_action_end_dx2(struct sk_buff *skb,
460 				struct seg6_local_lwt *slwt)
461 {
462 	struct net *net = dev_net(skb->dev);
463 	struct net_device *odev;
464 	struct ethhdr *eth;
465 
466 	if (!decap_and_validate(skb, IPPROTO_ETHERNET))
467 		goto drop;
468 
469 	if (!pskb_may_pull(skb, ETH_HLEN))
470 		goto drop;
471 
472 	skb_reset_mac_header(skb);
473 	eth = (struct ethhdr *)skb->data;
474 
475 	/* To determine the frame's protocol, we assume it is 802.3. This avoids
476 	 * a call to eth_type_trans(), which is not really relevant for our
477 	 * use case.
478 	 */
479 	if (!eth_proto_is_802_3(eth->h_proto))
480 		goto drop;
481 
482 	odev = dev_get_by_index_rcu(net, slwt->oif);
483 	if (!odev)
484 		goto drop;
485 
486 	/* As we accept Ethernet frames, make sure the egress device is of
487 	 * the correct type.
488 	 */
489 	if (odev->type != ARPHRD_ETHER)
490 		goto drop;
491 
492 	if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
493 		goto drop;
494 
495 	skb_orphan(skb);
496 
497 	if (skb_warn_if_lro(skb))
498 		goto drop;
499 
500 	skb_forward_csum(skb);
501 
502 	if (skb->len - ETH_HLEN > odev->mtu)
503 		goto drop;
504 
505 	skb->dev = odev;
506 	skb->protocol = eth->h_proto;
507 
508 	return dev_queue_xmit(skb);
509 
510 drop:
511 	kfree_skb(skb);
512 	return -EINVAL;
513 }
514 
515 static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
516 				       struct sk_buff *skb)
517 {
518 	struct dst_entry *orig_dst = skb_dst(skb);
519 	struct in6_addr *nhaddr = NULL;
520 	struct seg6_local_lwt *slwt;
521 
522 	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
523 
524 	/* The inner packet is not associated to any local interface,
525 	 * so we do not call netif_rx().
526 	 *
527 	 * If slwt->nh6 is set to ::, then lookup the nexthop for the
528 	 * inner packet's DA. Otherwise, use the specified nexthop.
529 	 */
530 	if (!ipv6_addr_any(&slwt->nh6))
531 		nhaddr = &slwt->nh6;
532 
533 	seg6_lookup_nexthop(skb, nhaddr, 0);
534 
535 	return dst_input(skb);
536 }
537 
538 /* decapsulate and forward to specified nexthop */
539 static int input_action_end_dx6(struct sk_buff *skb,
540 				struct seg6_local_lwt *slwt)
541 {
542 	/* this function accepts IPv6 encapsulated packets, with either
543 	 * an SRH with SL=0, or no SRH.
544 	 */
545 
546 	if (!decap_and_validate(skb, IPPROTO_IPV6))
547 		goto drop;
548 
549 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
550 		goto drop;
551 
552 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
553 	nf_reset_ct(skb);
554 
555 	if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
556 		return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
557 			       dev_net(skb->dev), NULL, skb, NULL,
558 			       skb_dst(skb)->dev, input_action_end_dx6_finish);
559 
560 	return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
561 drop:
562 	kfree_skb(skb);
563 	return -EINVAL;
564 }
565 
566 static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
567 				       struct sk_buff *skb)
568 {
569 	struct dst_entry *orig_dst = skb_dst(skb);
570 	struct seg6_local_lwt *slwt;
571 	struct iphdr *iph;
572 	__be32 nhaddr;
573 	int err;
574 
575 	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
576 
577 	iph = ip_hdr(skb);
578 
579 	nhaddr = slwt->nh4.s_addr ?: iph->daddr;
580 
581 	skb_dst_drop(skb);
582 
583 	err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
584 	if (err) {
585 		kfree_skb(skb);
586 		return -EINVAL;
587 	}
588 
589 	return dst_input(skb);
590 }
591 
592 static int input_action_end_dx4(struct sk_buff *skb,
593 				struct seg6_local_lwt *slwt)
594 {
595 	if (!decap_and_validate(skb, IPPROTO_IPIP))
596 		goto drop;
597 
598 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
599 		goto drop;
600 
601 	skb->protocol = htons(ETH_P_IP);
602 	skb_set_transport_header(skb, sizeof(struct iphdr));
603 	nf_reset_ct(skb);
604 
605 	if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
606 		return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
607 			       dev_net(skb->dev), NULL, skb, NULL,
608 			       skb_dst(skb)->dev, input_action_end_dx4_finish);
609 
610 	return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
611 drop:
612 	kfree_skb(skb);
613 	return -EINVAL;
614 }
615 
616 #ifdef CONFIG_NET_L3_MASTER_DEV
617 static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
618 {
619 	const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
620 
621 	return nli->nl_net;
622 }
623 
624 static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
625 				   u16 family, struct netlink_ext_ack *extack)
626 {
627 	struct seg6_end_dt_info *info = &slwt->dt_info;
628 	int vrf_ifindex;
629 	struct net *net;
630 
631 	net = fib6_config_get_net(cfg);
632 
633 	/* note that vrf_table was already set by parse_nla_vrftable() */
634 	vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
635 							info->vrf_table);
636 	if (vrf_ifindex < 0) {
637 		if (vrf_ifindex == -EPERM) {
638 			NL_SET_ERR_MSG(extack,
639 				       "Strict mode for VRF is disabled");
640 		} else if (vrf_ifindex == -ENODEV) {
641 			NL_SET_ERR_MSG(extack,
642 				       "Table has no associated VRF device");
643 		} else {
644 			pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
645 				 vrf_ifindex);
646 		}
647 
648 		return vrf_ifindex;
649 	}
650 
651 	info->net = net;
652 	info->vrf_ifindex = vrf_ifindex;
653 
654 	info->family = family;
655 	info->mode = DT_VRF_MODE;
656 
657 	return 0;
658 }
659 
660 /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
661  * routes the IPv4/IPv6 packet by looking at the configured routing table.
662  *
663  * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
664  * Routing Header packets) from several interfaces and the outer IPv6
665  * destination address (DA) is used for retrieving the specific instance of the
666  * End.DT4/DT6 behavior that should process the packets.
667  *
668  * However, the inner IPv4/IPv6 packet is not really bound to any receiving
669  * interface and thus the End.DT4/DT6 sets the VRF (associated with the
670  * corresponding routing table) as the *receiving* interface.
671  * In other words, the End.DT4/DT6 processes a packet as if it has been received
672  * directly by the VRF (and not by one of its slave devices, if any).
673  * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
674  * according to the routing table configured by the End.DT4/DT6 instance.
675  *
676  * This design allows you to get some interesting features like:
677  *  1) the statistics on rx packets;
678  *  2) the possibility to install a packet sniffer on the receiving interface
679  *     (the VRF one) for looking at the incoming packets;
680  *  3) the possibility to leverage the netfilter prerouting hook for the inner
681  *     IPv4 packet.
682  *
683  * This function returns:
684  *  - the sk_buff* when the VRF rcv handler has processed the packet correctly;
685  *  - NULL when the skb is consumed by the VRF rcv handler;
686  *  - a pointer which encodes a negative error number in case of error.
687  *    Note that in this case, the function takes care of freeing the skb.
688  */
689 static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
690 				      struct net_device *dev)
691 {
692 	/* based on l3mdev_ip_rcv; we are only interested in the master */
693 	if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
694 		goto drop;
695 
696 	if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
697 		goto drop;
698 
699 	/* the decap packet IPv4/IPv6 does not come with any mac header info.
700 	 * We must unset the mac header to allow the VRF device to rebuild it,
701 	 * just in case there is a sniffer attached on the device.
702 	 */
703 	skb_unset_mac_header(skb);
704 
705 	skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
706 	if (!skb)
707 		/* the skb buffer was consumed by the handler */
708 		return NULL;
709 
710 	/* when a packet is received by a VRF or by one of its slaves, the
711 	 * master device reference is set into the skb.
712 	 */
713 	if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
714 		goto drop;
715 
716 	return skb;
717 
718 drop:
719 	kfree_skb(skb);
720 	return ERR_PTR(-EINVAL);
721 }
722 
723 static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
724 					     struct seg6_end_dt_info *info)
725 {
726 	int vrf_ifindex = info->vrf_ifindex;
727 	struct net *net = info->net;
728 
729 	if (unlikely(vrf_ifindex < 0))
730 		goto error;
731 
732 	if (unlikely(!net_eq(dev_net(skb->dev), net)))
733 		goto error;
734 
735 	return dev_get_by_index_rcu(net, vrf_ifindex);
736 
737 error:
738 	return NULL;
739 }
740 
741 static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
742 				       struct seg6_local_lwt *slwt, u16 family)
743 {
744 	struct seg6_end_dt_info *info = &slwt->dt_info;
745 	struct net_device *vrf;
746 	__be16 protocol;
747 	int hdrlen;
748 
749 	vrf = end_dt_get_vrf_rcu(skb, info);
750 	if (unlikely(!vrf))
751 		goto drop;
752 
753 	switch (family) {
754 	case AF_INET:
755 		protocol = htons(ETH_P_IP);
756 		hdrlen = sizeof(struct iphdr);
757 		break;
758 	case AF_INET6:
759 		protocol = htons(ETH_P_IPV6);
760 		hdrlen = sizeof(struct ipv6hdr);
761 		break;
762 	case AF_UNSPEC:
763 		fallthrough;
764 	default:
765 		goto drop;
766 	}
767 
768 	if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
769 		pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
770 		goto drop;
771 	}
772 
773 	skb->protocol = protocol;
774 
775 	skb_dst_drop(skb);
776 
777 	skb_set_transport_header(skb, hdrlen);
778 	nf_reset_ct(skb);
779 
780 	return end_dt_vrf_rcv(skb, family, vrf);
781 
782 drop:
783 	kfree_skb(skb);
784 	return ERR_PTR(-EINVAL);
785 }
786 
787 static int input_action_end_dt4(struct sk_buff *skb,
788 				struct seg6_local_lwt *slwt)
789 {
790 	struct iphdr *iph;
791 	int err;
792 
793 	if (!decap_and_validate(skb, IPPROTO_IPIP))
794 		goto drop;
795 
796 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
797 		goto drop;
798 
799 	skb = end_dt_vrf_core(skb, slwt, AF_INET);
800 	if (!skb)
801 		/* packet has been processed and consumed by the VRF */
802 		return 0;
803 
804 	if (IS_ERR(skb))
805 		return PTR_ERR(skb);
806 
807 	iph = ip_hdr(skb);
808 
809 	err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
810 	if (unlikely(err))
811 		goto drop;
812 
813 	return dst_input(skb);
814 
815 drop:
816 	kfree_skb(skb);
817 	return -EINVAL;
818 }
819 
820 static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
821 			      struct netlink_ext_ack *extack)
822 {
823 	return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
824 }
825 
826 static enum
827 seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
828 {
829 	unsigned long parsed_optattrs = slwt->parsed_optattrs;
830 	bool legacy, vrfmode;
831 
832 	legacy	= !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE));
833 	vrfmode	= !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE));
834 
835 	if (!(legacy ^ vrfmode))
836 		/* both are absent or present: invalid DT6 mode */
837 		return DT_INVALID_MODE;
838 
839 	return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
840 }
841 
842 static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
843 {
844 	struct seg6_end_dt_info *info = &slwt->dt_info;
845 
846 	return info->mode;
847 }
848 
849 static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
850 			      struct netlink_ext_ack *extack)
851 {
852 	enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
853 	struct seg6_end_dt_info *info = &slwt->dt_info;
854 
855 	switch (mode) {
856 	case DT_LEGACY_MODE:
857 		info->mode = DT_LEGACY_MODE;
858 		return 0;
859 	case DT_VRF_MODE:
860 		return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
861 	default:
862 		NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
863 		return -EINVAL;
864 	}
865 }
866 #endif
867 
868 static int input_action_end_dt6(struct sk_buff *skb,
869 				struct seg6_local_lwt *slwt)
870 {
871 	if (!decap_and_validate(skb, IPPROTO_IPV6))
872 		goto drop;
873 
874 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
875 		goto drop;
876 
877 #ifdef CONFIG_NET_L3_MASTER_DEV
878 	if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
879 		goto legacy_mode;
880 
881 	/* DT6_VRF_MODE */
882 	skb = end_dt_vrf_core(skb, slwt, AF_INET6);
883 	if (!skb)
884 		/* packet has been processed and consumed by the VRF */
885 		return 0;
886 
887 	if (IS_ERR(skb))
888 		return PTR_ERR(skb);
889 
890 	/* note: this time we do not need to specify the table because the VRF
891 	 * takes care of selecting the correct table.
892 	 */
893 	seg6_lookup_any_nexthop(skb, NULL, 0, true);
894 
895 	return dst_input(skb);
896 
897 legacy_mode:
898 #endif
899 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
900 
901 	seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
902 
903 	return dst_input(skb);
904 
905 drop:
906 	kfree_skb(skb);
907 	return -EINVAL;
908 }
909 
910 #ifdef CONFIG_NET_L3_MASTER_DEV
911 static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
912 			       struct netlink_ext_ack *extack)
913 {
914 	return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
915 }
916 
917 static int input_action_end_dt46(struct sk_buff *skb,
918 				 struct seg6_local_lwt *slwt)
919 {
920 	unsigned int off = 0;
921 	int nexthdr;
922 
923 	nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
924 	if (unlikely(nexthdr < 0))
925 		goto drop;
926 
927 	switch (nexthdr) {
928 	case IPPROTO_IPIP:
929 		return input_action_end_dt4(skb, slwt);
930 	case IPPROTO_IPV6:
931 		return input_action_end_dt6(skb, slwt);
932 	}
933 
934 drop:
935 	kfree_skb(skb);
936 	return -EINVAL;
937 }
938 #endif
939 
940 /* push an SRH on top of the current one */
941 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
942 {
943 	struct ipv6_sr_hdr *srh;
944 	int err = -EINVAL;
945 
946 	srh = get_and_validate_srh(skb);
947 	if (!srh)
948 		goto drop;
949 
950 	err = seg6_do_srh_inline(skb, slwt->srh);
951 	if (err)
952 		goto drop;
953 
954 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
955 
956 	seg6_lookup_nexthop(skb, NULL, 0);
957 
958 	return dst_input(skb);
959 
960 drop:
961 	kfree_skb(skb);
962 	return err;
963 }
964 
965 /* encapsulate within an outer IPv6 header and a specified SRH */
966 static int input_action_end_b6_encap(struct sk_buff *skb,
967 				     struct seg6_local_lwt *slwt)
968 {
969 	struct ipv6_sr_hdr *srh;
970 	int err = -EINVAL;
971 
972 	srh = get_and_validate_srh(skb);
973 	if (!srh)
974 		goto drop;
975 
976 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
977 
978 	skb_reset_inner_headers(skb);
979 	skb->encapsulation = 1;
980 
981 	err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
982 	if (err)
983 		goto drop;
984 
985 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
986 
987 	seg6_lookup_nexthop(skb, NULL, 0);
988 
989 	return dst_input(skb);
990 
991 drop:
992 	kfree_skb(skb);
993 	return err;
994 }
995 
996 DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
997 
998 bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
999 {
1000 	struct seg6_bpf_srh_state *srh_state =
1001 		this_cpu_ptr(&seg6_bpf_srh_states);
1002 	struct ipv6_sr_hdr *srh = srh_state->srh;
1003 
1004 	if (unlikely(srh == NULL))
1005 		return false;
1006 
1007 	if (unlikely(!srh_state->valid)) {
1008 		if ((srh_state->hdrlen & 7) != 0)
1009 			return false;
1010 
1011 		srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
1012 		if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
1013 			return false;
1014 
1015 		srh_state->valid = true;
1016 	}
1017 
1018 	return true;
1019 }
1020 
1021 static int input_action_end_bpf(struct sk_buff *skb,
1022 				struct seg6_local_lwt *slwt)
1023 {
1024 	struct seg6_bpf_srh_state *srh_state =
1025 		this_cpu_ptr(&seg6_bpf_srh_states);
1026 	struct ipv6_sr_hdr *srh;
1027 	int ret;
1028 
1029 	srh = get_and_validate_srh(skb);
1030 	if (!srh) {
1031 		kfree_skb(skb);
1032 		return -EINVAL;
1033 	}
1034 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
1035 
1036 	/* preempt_disable is needed to protect the per-CPU buffer srh_state,
1037 	 * which is also accessed by the bpf_lwt_seg6_* helpers
1038 	 */
1039 	preempt_disable();
1040 	srh_state->srh = srh;
1041 	srh_state->hdrlen = srh->hdrlen << 3;
1042 	srh_state->valid = true;
1043 
1044 	rcu_read_lock();
1045 	bpf_compute_data_pointers(skb);
1046 	ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
1047 	rcu_read_unlock();
1048 
1049 	switch (ret) {
1050 	case BPF_OK:
1051 	case BPF_REDIRECT:
1052 		break;
1053 	case BPF_DROP:
1054 		goto drop;
1055 	default:
1056 		pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
1057 		goto drop;
1058 	}
1059 
1060 	if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
1061 		goto drop;
1062 
1063 	preempt_enable();
1064 	if (ret != BPF_REDIRECT)
1065 		seg6_lookup_nexthop(skb, NULL, 0);
1066 
1067 	return dst_input(skb);
1068 
1069 drop:
1070 	preempt_enable();
1071 	kfree_skb(skb);
1072 	return -EINVAL;
1073 }
1074 
1075 static struct seg6_action_desc seg6_action_table[] = {
1076 	{
1077 		.action		= SEG6_LOCAL_ACTION_END,
1078 		.attrs		= 0,
1079 		.optattrs	= SEG6_F_LOCAL_COUNTERS |
1080 				  SEG6_F_ATTR(SEG6_LOCAL_FLAVORS),
1081 		.input		= input_action_end,
1082 	},
1083 	{
1084 		.action		= SEG6_LOCAL_ACTION_END_X,
1085 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_NH6),
1086 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1087 		.input		= input_action_end_x,
1088 	},
1089 	{
1090 		.action		= SEG6_LOCAL_ACTION_END_T,
1091 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_TABLE),
1092 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1093 		.input		= input_action_end_t,
1094 	},
1095 	{
1096 		.action		= SEG6_LOCAL_ACTION_END_DX2,
1097 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_OIF),
1098 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1099 		.input		= input_action_end_dx2,
1100 	},
1101 	{
1102 		.action		= SEG6_LOCAL_ACTION_END_DX6,
1103 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_NH6),
1104 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1105 		.input		= input_action_end_dx6,
1106 	},
1107 	{
1108 		.action		= SEG6_LOCAL_ACTION_END_DX4,
1109 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_NH4),
1110 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1111 		.input		= input_action_end_dx4,
1112 	},
1113 	{
1114 		.action		= SEG6_LOCAL_ACTION_END_DT4,
1115 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1116 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1117 #ifdef CONFIG_NET_L3_MASTER_DEV
1118 		.input		= input_action_end_dt4,
1119 		.slwt_ops	= {
1120 					.build_state = seg6_end_dt4_build,
1121 				  },
1122 #endif
1123 	},
1124 	{
1125 		.action		= SEG6_LOCAL_ACTION_END_DT6,
1126 #ifdef CONFIG_NET_L3_MASTER_DEV
1127 		.attrs		= 0,
1128 		.optattrs	= SEG6_F_LOCAL_COUNTERS		|
1129 				  SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
1130 				  SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1131 		.slwt_ops	= {
1132 					.build_state = seg6_end_dt6_build,
1133 				  },
1134 #else
1135 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_TABLE),
1136 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1137 #endif
1138 		.input		= input_action_end_dt6,
1139 	},
1140 	{
1141 		.action		= SEG6_LOCAL_ACTION_END_DT46,
1142 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1143 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1144 #ifdef CONFIG_NET_L3_MASTER_DEV
1145 		.input		= input_action_end_dt46,
1146 		.slwt_ops	= {
1147 					.build_state = seg6_end_dt46_build,
1148 				  },
1149 #endif
1150 	},
1151 	{
1152 		.action		= SEG6_LOCAL_ACTION_END_B6,
1153 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_SRH),
1154 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1155 		.input		= input_action_end_b6,
1156 	},
1157 	{
1158 		.action		= SEG6_LOCAL_ACTION_END_B6_ENCAP,
1159 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_SRH),
1160 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1161 		.input		= input_action_end_b6_encap,
1162 		.static_headroom	= sizeof(struct ipv6hdr),
1163 	},
1164 	{
1165 		.action		= SEG6_LOCAL_ACTION_END_BPF,
1166 		.attrs		= SEG6_F_ATTR(SEG6_LOCAL_BPF),
1167 		.optattrs	= SEG6_F_LOCAL_COUNTERS,
1168 		.input		= input_action_end_bpf,
1169 	},
1170 
1171 };
1172 
1173 static struct seg6_action_desc *__get_action_desc(int action)
1174 {
1175 	struct seg6_action_desc *desc;
1176 	int i, count;
1177 
1178 	count = ARRAY_SIZE(seg6_action_table);
1179 	for (i = 0; i < count; i++) {
1180 		desc = &seg6_action_table[i];
1181 		if (desc->action == action)
1182 			return desc;
1183 	}
1184 
1185 	return NULL;
1186 }
1187 
1188 static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt)
1189 {
1190 	return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS;
1191 }
1192 
1193 static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
1194 				       unsigned int len, int err)
1195 {
1196 	struct pcpu_seg6_local_counters *pcounters;
1197 
1198 	pcounters = this_cpu_ptr(slwt->pcpu_counters);
1199 	u64_stats_update_begin(&pcounters->syncp);
1200 
1201 	if (likely(!err)) {
1202 		u64_stats_inc(&pcounters->packets);
1203 		u64_stats_add(&pcounters->bytes, len);
1204 	} else {
1205 		u64_stats_inc(&pcounters->errors);
1206 	}
1207 
1208 	u64_stats_update_end(&pcounters->syncp);
1209 }
1210 
1211 static int seg6_local_input_core(struct net *net, struct sock *sk,
1212 				 struct sk_buff *skb)
1213 {
1214 	struct dst_entry *orig_dst = skb_dst(skb);
1215 	struct seg6_action_desc *desc;
1216 	struct seg6_local_lwt *slwt;
1217 	unsigned int len = skb->len;
1218 	int rc;
1219 
1220 	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
1221 	desc = slwt->desc;
1222 
1223 	rc = desc->input(skb, slwt);
1224 
1225 	if (!seg6_lwtunnel_counters_enabled(slwt))
1226 		return rc;
1227 
1228 	seg6_local_update_counters(slwt, len, rc);
1229 
1230 	return rc;
1231 }
1232 
1233 static int seg6_local_input(struct sk_buff *skb)
1234 {
1235 	if (skb->protocol != htons(ETH_P_IPV6)) {
1236 		kfree_skb(skb);
1237 		return -EINVAL;
1238 	}
1239 
1240 	if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
1241 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
1242 			       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
1243 			       seg6_local_input_core);
1244 
1245 	return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
1246 }
1247 
1248 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
1249 	[SEG6_LOCAL_ACTION]	= { .type = NLA_U32 },
1250 	[SEG6_LOCAL_SRH]	= { .type = NLA_BINARY },
1251 	[SEG6_LOCAL_TABLE]	= { .type = NLA_U32 },
1252 	[SEG6_LOCAL_VRFTABLE]	= { .type = NLA_U32 },
1253 	[SEG6_LOCAL_NH4]	= { .type = NLA_BINARY,
1254 				    .len = sizeof(struct in_addr) },
1255 	[SEG6_LOCAL_NH6]	= { .type = NLA_BINARY,
1256 				    .len = sizeof(struct in6_addr) },
1257 	[SEG6_LOCAL_IIF]	= { .type = NLA_U32 },
1258 	[SEG6_LOCAL_OIF]	= { .type = NLA_U32 },
1259 	[SEG6_LOCAL_BPF]	= { .type = NLA_NESTED },
1260 	[SEG6_LOCAL_COUNTERS]	= { .type = NLA_NESTED },
1261 	[SEG6_LOCAL_FLAVORS]	= { .type = NLA_NESTED },
1262 };
1263 
1264 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1265 			 struct netlink_ext_ack *extack)
1266 {
1267 	struct ipv6_sr_hdr *srh;
1268 	int len;
1269 
1270 	srh = nla_data(attrs[SEG6_LOCAL_SRH]);
1271 	len = nla_len(attrs[SEG6_LOCAL_SRH]);
1272 
1273 	/* SRH must contain at least one segment */
1274 	if (len < sizeof(*srh) + sizeof(struct in6_addr))
1275 		return -EINVAL;
1276 
1277 	if (!seg6_validate_srh(srh, len, false))
1278 		return -EINVAL;
1279 
1280 	slwt->srh = kmemdup(srh, len, GFP_KERNEL);
1281 	if (!slwt->srh)
1282 		return -ENOMEM;
1283 
1284 	slwt->headroom += len;
1285 
1286 	return 0;
1287 }
1288 
1289 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1290 {
1291 	struct ipv6_sr_hdr *srh;
1292 	struct nlattr *nla;
1293 	int len;
1294 
1295 	srh = slwt->srh;
1296 	len = (srh->hdrlen + 1) << 3;
1297 
1298 	nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
1299 	if (!nla)
1300 		return -EMSGSIZE;
1301 
1302 	memcpy(nla_data(nla), srh, len);
1303 
1304 	return 0;
1305 }
1306 
1307 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1308 {
1309 	int len = (a->srh->hdrlen + 1) << 3;
1310 
1311 	if (len != ((b->srh->hdrlen + 1) << 3))
1312 		return 1;
1313 
1314 	return memcmp(a->srh, b->srh, len);
1315 }
1316 
1317 static void destroy_attr_srh(struct seg6_local_lwt *slwt)
1318 {
1319 	kfree(slwt->srh);
1320 }
1321 
1322 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1323 			   struct netlink_ext_ack *extack)
1324 {
1325 	slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
1326 
1327 	return 0;
1328 }
1329 
1330 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1331 {
1332 	if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
1333 		return -EMSGSIZE;
1334 
1335 	return 0;
1336 }
1337 
1338 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1339 {
1340 	if (a->table != b->table)
1341 		return 1;
1342 
1343 	return 0;
1344 }
1345 
1346 static struct
1347 seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
1348 {
1349 #ifdef CONFIG_NET_L3_MASTER_DEV
1350 	return &slwt->dt_info;
1351 #else
1352 	return ERR_PTR(-EOPNOTSUPP);
1353 #endif
1354 }
1355 
1356 static int parse_nla_vrftable(struct nlattr **attrs,
1357 			      struct seg6_local_lwt *slwt,
1358 			      struct netlink_ext_ack *extack)
1359 {
1360 	struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
1361 
1362 	if (IS_ERR(info))
1363 		return PTR_ERR(info);
1364 
1365 	info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
1366 
1367 	return 0;
1368 }
1369 
1370 static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1371 {
1372 	struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
1373 
1374 	if (IS_ERR(info))
1375 		return PTR_ERR(info);
1376 
1377 	if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
1378 		return -EMSGSIZE;
1379 
1380 	return 0;
1381 }
1382 
1383 static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1384 {
1385 	struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
1386 	struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
1387 
1388 	if (info_a->vrf_table != info_b->vrf_table)
1389 		return 1;
1390 
1391 	return 0;
1392 }
1393 
1394 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1395 			 struct netlink_ext_ack *extack)
1396 {
1397 	memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
1398 	       sizeof(struct in_addr));
1399 
1400 	return 0;
1401 }
1402 
1403 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1404 {
1405 	struct nlattr *nla;
1406 
1407 	nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
1408 	if (!nla)
1409 		return -EMSGSIZE;
1410 
1411 	memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
1412 
1413 	return 0;
1414 }
1415 
1416 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1417 {
1418 	return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
1419 }
1420 
1421 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1422 			 struct netlink_ext_ack *extack)
1423 {
1424 	memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
1425 	       sizeof(struct in6_addr));
1426 
1427 	return 0;
1428 }
1429 
1430 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1431 {
1432 	struct nlattr *nla;
1433 
1434 	nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
1435 	if (!nla)
1436 		return -EMSGSIZE;
1437 
1438 	memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
1439 
1440 	return 0;
1441 }
1442 
1443 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1444 {
1445 	return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
1446 }
1447 
1448 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1449 			 struct netlink_ext_ack *extack)
1450 {
1451 	slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
1452 
1453 	return 0;
1454 }
1455 
1456 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1457 {
1458 	if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
1459 		return -EMSGSIZE;
1460 
1461 	return 0;
1462 }
1463 
1464 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1465 {
1466 	if (a->iif != b->iif)
1467 		return 1;
1468 
1469 	return 0;
1470 }
1471 
1472 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1473 			 struct netlink_ext_ack *extack)
1474 {
1475 	slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
1476 
1477 	return 0;
1478 }
1479 
1480 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1481 {
1482 	if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
1483 		return -EMSGSIZE;
1484 
1485 	return 0;
1486 }
1487 
1488 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1489 {
1490 	if (a->oif != b->oif)
1491 		return 1;
1492 
1493 	return 0;
1494 }
1495 
1496 #define MAX_PROG_NAME 256
1497 static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
1498 	[SEG6_LOCAL_BPF_PROG]	   = { .type = NLA_U32, },
1499 	[SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
1500 				       .len = MAX_PROG_NAME },
1501 };
1502 
1503 static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1504 			 struct netlink_ext_ack *extack)
1505 {
1506 	struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
1507 	struct bpf_prog *p;
1508 	int ret;
1509 	u32 fd;
1510 
1511 	ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
1512 					  attrs[SEG6_LOCAL_BPF],
1513 					  bpf_prog_policy, NULL);
1514 	if (ret < 0)
1515 		return ret;
1516 
1517 	if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
1518 		return -EINVAL;
1519 
1520 	slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
1521 	if (!slwt->bpf.name)
1522 		return -ENOMEM;
1523 
1524 	fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
1525 	p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
1526 	if (IS_ERR(p)) {
1527 		kfree(slwt->bpf.name);
1528 		return PTR_ERR(p);
1529 	}
1530 
1531 	slwt->bpf.prog = p;
1532 	return 0;
1533 }
1534 
1535 static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1536 {
1537 	struct nlattr *nest;
1538 
1539 	if (!slwt->bpf.prog)
1540 		return 0;
1541 
1542 	nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
1543 	if (!nest)
1544 		return -EMSGSIZE;
1545 
1546 	if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
1547 		return -EMSGSIZE;
1548 
1549 	if (slwt->bpf.name &&
1550 	    nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
1551 		return -EMSGSIZE;
1552 
1553 	return nla_nest_end(skb, nest);
1554 }
1555 
1556 static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1557 {
1558 	if (!a->bpf.name && !b->bpf.name)
1559 		return 0;
1560 
1561 	if (!a->bpf.name || !b->bpf.name)
1562 		return 1;
1563 
1564 	return strcmp(a->bpf.name, b->bpf.name);
1565 }
1566 
1567 static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
1568 {
1569 	kfree(slwt->bpf.name);
1570 	if (slwt->bpf.prog)
1571 		bpf_prog_put(slwt->bpf.prog);
1572 }
1573 
1574 static const struct
1575 nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
1576 	[SEG6_LOCAL_CNT_PACKETS]	= { .type = NLA_U64 },
1577 	[SEG6_LOCAL_CNT_BYTES]		= { .type = NLA_U64 },
1578 	[SEG6_LOCAL_CNT_ERRORS]		= { .type = NLA_U64 },
1579 };
1580 
1581 static int parse_nla_counters(struct nlattr **attrs,
1582 			      struct seg6_local_lwt *slwt,
1583 			      struct netlink_ext_ack *extack)
1584 {
1585 	struct pcpu_seg6_local_counters __percpu *pcounters;
1586 	struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
1587 	int ret;
1588 
1589 	ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX,
1590 					  attrs[SEG6_LOCAL_COUNTERS],
1591 					  seg6_local_counters_policy, NULL);
1592 	if (ret < 0)
1593 		return ret;
1594 
1595 	/* basic support for SRv6 Behavior counters requires at least:
1596 	 * packets, bytes and errors.
1597 	 */
1598 	if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] ||
1599 	    !tb[SEG6_LOCAL_CNT_ERRORS])
1600 		return -EINVAL;
1601 
1602 	/* counters are always zero initialized */
1603 	pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL);
1604 	if (!pcounters)
1605 		return -ENOMEM;
1606 
1607 	slwt->pcpu_counters = pcounters;
1608 
1609 	return 0;
1610 }
1611 
1612 static int seg6_local_fill_nla_counters(struct sk_buff *skb,
1613 					struct seg6_local_counters *counters)
1614 {
1615 	if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets,
1616 			      SEG6_LOCAL_CNT_PAD))
1617 		return -EMSGSIZE;
1618 
1619 	if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes,
1620 			      SEG6_LOCAL_CNT_PAD))
1621 		return -EMSGSIZE;
1622 
1623 	if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors,
1624 			      SEG6_LOCAL_CNT_PAD))
1625 		return -EMSGSIZE;
1626 
1627 	return 0;
1628 }
1629 
1630 static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1631 {
1632 	struct seg6_local_counters counters = { 0, 0, 0 };
1633 	struct nlattr *nest;
1634 	int rc, i;
1635 
1636 	nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS);
1637 	if (!nest)
1638 		return -EMSGSIZE;
1639 
1640 	for_each_possible_cpu(i) {
1641 		struct pcpu_seg6_local_counters *pcounters;
1642 		u64 packets, bytes, errors;
1643 		unsigned int start;
1644 
1645 		pcounters = per_cpu_ptr(slwt->pcpu_counters, i);
1646 		do {
1647 			start = u64_stats_fetch_begin_irq(&pcounters->syncp);
1648 
1649 			packets = u64_stats_read(&pcounters->packets);
1650 			bytes = u64_stats_read(&pcounters->bytes);
1651 			errors = u64_stats_read(&pcounters->errors);
1652 
1653 		} while (u64_stats_fetch_retry_irq(&pcounters->syncp, start));
1654 
1655 		counters.packets += packets;
1656 		counters.bytes += bytes;
1657 		counters.errors += errors;
1658 	}
1659 
1660 	rc = seg6_local_fill_nla_counters(skb, &counters);
1661 	if (rc < 0) {
1662 		nla_nest_cancel(skb, nest);
1663 		return rc;
1664 	}
1665 
1666 	return nla_nest_end(skb, nest);
1667 }
1668 
1669 static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1670 {
1671 	/* a and b are equal if both have pcpu_counters set or not */
1672 	return (!!((unsigned long)a->pcpu_counters)) ^
1673 		(!!((unsigned long)b->pcpu_counters));
1674 }
1675 
1676 static void destroy_attr_counters(struct seg6_local_lwt *slwt)
1677 {
1678 	free_percpu(slwt->pcpu_counters);
1679 }
1680 
1681 static const
1682 struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = {
1683 	[SEG6_LOCAL_FLV_OPERATION]	= { .type = NLA_U32 },
1684 	[SEG6_LOCAL_FLV_LCBLOCK_BITS]	= { .type = NLA_U8 },
1685 	[SEG6_LOCAL_FLV_LCNODE_FN_BITS]	= { .type = NLA_U8 },
1686 };
1687 
1688 /* check whether the lengths of the Locator-Block and Locator-Node Function
1689  * are compatible with the dimension of a C-SID container.
1690  */
1691 static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len)
1692 {
1693 	/* Locator-Block and Locator-Node Function cannot exceed 128 bits
1694 	 * (i.e. C-SID container lenghts).
1695 	 */
1696 	if (next_csid_chk_cntr_bits(block_len, func_len))
1697 		return -EINVAL;
1698 
1699 	/* Locator-Block length must be greater than zero and evenly divisible
1700 	 * by 8. There must be room for a Locator-Node Function, at least.
1701 	 */
1702 	if (next_csid_chk_lcblock_bits(block_len))
1703 		return -EINVAL;
1704 
1705 	/* Locator-Node Function length must be greater than zero and evenly
1706 	 * divisible by 8. There must be room for the Locator-Block.
1707 	 */
1708 	if (next_csid_chk_lcnode_fn_bits(func_len))
1709 		return -EINVAL;
1710 
1711 	return 0;
1712 }
1713 
1714 static int seg6_parse_nla_next_csid_cfg(struct nlattr **tb,
1715 					struct seg6_flavors_info *finfo,
1716 					struct netlink_ext_ack *extack)
1717 {
1718 	__u8 func_len = SEG6_LOCAL_LCNODE_FN_DBITS;
1719 	__u8 block_len = SEG6_LOCAL_LCBLOCK_DBITS;
1720 	int rc;
1721 
1722 	if (tb[SEG6_LOCAL_FLV_LCBLOCK_BITS])
1723 		block_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]);
1724 
1725 	if (tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS])
1726 		func_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]);
1727 
1728 	rc = seg6_chk_next_csid_cfg(block_len, func_len);
1729 	if (rc < 0) {
1730 		NL_SET_ERR_MSG(extack,
1731 			       "Invalid Locator Block/Node Function lengths");
1732 		return rc;
1733 	}
1734 
1735 	finfo->lcblock_bits = block_len;
1736 	finfo->lcnode_func_bits = func_len;
1737 
1738 	return 0;
1739 }
1740 
1741 static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1742 			     struct netlink_ext_ack *extack)
1743 {
1744 	struct seg6_flavors_info *finfo = &slwt->flv_info;
1745 	struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
1746 	unsigned long fops;
1747 	int rc;
1748 
1749 	rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
1750 					 attrs[SEG6_LOCAL_FLAVORS],
1751 					 seg6_local_flavors_policy, NULL);
1752 	if (rc < 0)
1753 		return rc;
1754 
1755 	/* this attribute MUST always be present since it represents the Flavor
1756 	 * operation(s) to be carried out.
1757 	 */
1758 	if (!tb[SEG6_LOCAL_FLV_OPERATION])
1759 		return -EINVAL;
1760 
1761 	fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
1762 	if (fops & ~SEG6_LOCAL_FLV_SUPP_OPS) {
1763 		NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
1764 		return -EOPNOTSUPP;
1765 	}
1766 
1767 	finfo->flv_ops = fops;
1768 
1769 	if (seg6_next_csid_enabled(fops)) {
1770 		/* Locator-Block and Locator-Node Function lengths can be
1771 		 * provided by the user space. Otherwise, default values are
1772 		 * applied.
1773 		 */
1774 		rc = seg6_parse_nla_next_csid_cfg(tb, finfo, extack);
1775 		if (rc < 0)
1776 			return rc;
1777 	}
1778 
1779 	return 0;
1780 }
1781 
1782 static int seg6_fill_nla_next_csid_cfg(struct sk_buff *skb,
1783 				       struct seg6_flavors_info *finfo)
1784 {
1785 	if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCBLOCK_BITS, finfo->lcblock_bits))
1786 		return -EMSGSIZE;
1787 
1788 	if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCNODE_FN_BITS,
1789 		       finfo->lcnode_func_bits))
1790 		return -EMSGSIZE;
1791 
1792 	return 0;
1793 }
1794 
1795 static int put_nla_flavors(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1796 {
1797 	struct seg6_flavors_info *finfo = &slwt->flv_info;
1798 	__u32 fops = finfo->flv_ops;
1799 	struct nlattr *nest;
1800 	int rc;
1801 
1802 	nest = nla_nest_start(skb, SEG6_LOCAL_FLAVORS);
1803 	if (!nest)
1804 		return -EMSGSIZE;
1805 
1806 	if (nla_put_u32(skb, SEG6_LOCAL_FLV_OPERATION, fops)) {
1807 		rc = -EMSGSIZE;
1808 		goto err;
1809 	}
1810 
1811 	if (seg6_next_csid_enabled(fops)) {
1812 		rc = seg6_fill_nla_next_csid_cfg(skb, finfo);
1813 		if (rc < 0)
1814 			goto err;
1815 	}
1816 
1817 	return nla_nest_end(skb, nest);
1818 
1819 err:
1820 	nla_nest_cancel(skb, nest);
1821 	return rc;
1822 }
1823 
1824 static int seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info *finfo_a,
1825 				      struct seg6_flavors_info *finfo_b)
1826 {
1827 	if (finfo_a->lcblock_bits != finfo_b->lcblock_bits)
1828 		return 1;
1829 
1830 	if (finfo_a->lcnode_func_bits != finfo_b->lcnode_func_bits)
1831 		return 1;
1832 
1833 	return 0;
1834 }
1835 
1836 static int cmp_nla_flavors(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1837 {
1838 	struct seg6_flavors_info *finfo_a = &a->flv_info;
1839 	struct seg6_flavors_info *finfo_b = &b->flv_info;
1840 
1841 	if (finfo_a->flv_ops != finfo_b->flv_ops)
1842 		return 1;
1843 
1844 	if (seg6_next_csid_enabled(finfo_a->flv_ops)) {
1845 		if (seg6_cmp_nla_next_csid_cfg(finfo_a, finfo_b))
1846 			return 1;
1847 	}
1848 
1849 	return 0;
1850 }
1851 
1852 static int encap_size_flavors(struct seg6_local_lwt *slwt)
1853 {
1854 	struct seg6_flavors_info *finfo = &slwt->flv_info;
1855 	int nlsize;
1856 
1857 	nlsize = nla_total_size(0) +	/* nest SEG6_LOCAL_FLAVORS */
1858 		 nla_total_size(4);	/* SEG6_LOCAL_FLV_OPERATION */
1859 
1860 	if (seg6_next_csid_enabled(finfo->flv_ops))
1861 		nlsize += nla_total_size(1) + /* SEG6_LOCAL_FLV_LCBLOCK_BITS */
1862 			  nla_total_size(1); /* SEG6_LOCAL_FLV_LCNODE_FN_BITS */
1863 
1864 	return nlsize;
1865 }
1866 
1867 struct seg6_action_param {
1868 	int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1869 		     struct netlink_ext_ack *extack);
1870 	int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
1871 	int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
1872 
1873 	/* optional destroy() callback useful for releasing resources which
1874 	 * have been previously acquired in the corresponding parse()
1875 	 * function.
1876 	 */
1877 	void (*destroy)(struct seg6_local_lwt *slwt);
1878 };
1879 
1880 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
1881 	[SEG6_LOCAL_SRH]	= { .parse = parse_nla_srh,
1882 				    .put = put_nla_srh,
1883 				    .cmp = cmp_nla_srh,
1884 				    .destroy = destroy_attr_srh },
1885 
1886 	[SEG6_LOCAL_TABLE]	= { .parse = parse_nla_table,
1887 				    .put = put_nla_table,
1888 				    .cmp = cmp_nla_table },
1889 
1890 	[SEG6_LOCAL_NH4]	= { .parse = parse_nla_nh4,
1891 				    .put = put_nla_nh4,
1892 				    .cmp = cmp_nla_nh4 },
1893 
1894 	[SEG6_LOCAL_NH6]	= { .parse = parse_nla_nh6,
1895 				    .put = put_nla_nh6,
1896 				    .cmp = cmp_nla_nh6 },
1897 
1898 	[SEG6_LOCAL_IIF]	= { .parse = parse_nla_iif,
1899 				    .put = put_nla_iif,
1900 				    .cmp = cmp_nla_iif },
1901 
1902 	[SEG6_LOCAL_OIF]	= { .parse = parse_nla_oif,
1903 				    .put = put_nla_oif,
1904 				    .cmp = cmp_nla_oif },
1905 
1906 	[SEG6_LOCAL_BPF]	= { .parse = parse_nla_bpf,
1907 				    .put = put_nla_bpf,
1908 				    .cmp = cmp_nla_bpf,
1909 				    .destroy = destroy_attr_bpf },
1910 
1911 	[SEG6_LOCAL_VRFTABLE]	= { .parse = parse_nla_vrftable,
1912 				    .put = put_nla_vrftable,
1913 				    .cmp = cmp_nla_vrftable },
1914 
1915 	[SEG6_LOCAL_COUNTERS]	= { .parse = parse_nla_counters,
1916 				    .put = put_nla_counters,
1917 				    .cmp = cmp_nla_counters,
1918 				    .destroy = destroy_attr_counters },
1919 
1920 	[SEG6_LOCAL_FLAVORS]	= { .parse = parse_nla_flavors,
1921 				    .put = put_nla_flavors,
1922 				    .cmp = cmp_nla_flavors },
1923 };
1924 
1925 /* call the destroy() callback (if available) for each set attribute in
1926  * @parsed_attrs, starting from the first attribute up to the @max_parsed
1927  * (excluded) attribute.
1928  */
1929 static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
1930 			    struct seg6_local_lwt *slwt)
1931 {
1932 	struct seg6_action_param *param;
1933 	int i;
1934 
1935 	/* Every required seg6local attribute is identified by an ID which is
1936 	 * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
1937 	 *
1938 	 * We scan the 'parsed_attrs' bitmask, starting from the first attribute
1939 	 * up to the @max_parsed (excluded) attribute.
1940 	 * For each set attribute, we retrieve the corresponding destroy()
1941 	 * callback. If the callback is not available, then we skip to the next
1942 	 * attribute; otherwise, we call the destroy() callback.
1943 	 */
1944 	for (i = SEG6_LOCAL_SRH; i < max_parsed; ++i) {
1945 		if (!(parsed_attrs & SEG6_F_ATTR(i)))
1946 			continue;
1947 
1948 		param = &seg6_action_params[i];
1949 
1950 		if (param->destroy)
1951 			param->destroy(slwt);
1952 	}
1953 }
1954 
1955 /* release all the resources that may have been acquired during parsing
1956  * operations.
1957  */
1958 static void destroy_attrs(struct seg6_local_lwt *slwt)
1959 {
1960 	unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
1961 
1962 	__destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
1963 }
1964 
1965 static int parse_nla_optional_attrs(struct nlattr **attrs,
1966 				    struct seg6_local_lwt *slwt,
1967 				    struct netlink_ext_ack *extack)
1968 {
1969 	struct seg6_action_desc *desc = slwt->desc;
1970 	unsigned long parsed_optattrs = 0;
1971 	struct seg6_action_param *param;
1972 	int err, i;
1973 
1974 	for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; ++i) {
1975 		if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i])
1976 			continue;
1977 
1978 		/* once here, the i-th attribute is provided by the
1979 		 * userspace AND it is identified optional as well.
1980 		 */
1981 		param = &seg6_action_params[i];
1982 
1983 		err = param->parse(attrs, slwt, extack);
1984 		if (err < 0)
1985 			goto parse_optattrs_err;
1986 
1987 		/* current attribute has been correctly parsed */
1988 		parsed_optattrs |= SEG6_F_ATTR(i);
1989 	}
1990 
1991 	/* store in the tunnel state all the optional attributed successfully
1992 	 * parsed.
1993 	 */
1994 	slwt->parsed_optattrs = parsed_optattrs;
1995 
1996 	return 0;
1997 
1998 parse_optattrs_err:
1999 	__destroy_attrs(parsed_optattrs, i, slwt);
2000 
2001 	return err;
2002 }
2003 
2004 /* call the custom constructor of the behavior during its initialization phase
2005  * and after that all its attributes have been parsed successfully.
2006  */
2007 static int
2008 seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
2009 				struct netlink_ext_ack *extack)
2010 {
2011 	struct seg6_action_desc *desc = slwt->desc;
2012 	struct seg6_local_lwtunnel_ops *ops;
2013 
2014 	ops = &desc->slwt_ops;
2015 	if (!ops->build_state)
2016 		return 0;
2017 
2018 	return ops->build_state(slwt, cfg, extack);
2019 }
2020 
2021 /* call the custom destructor of the behavior which is invoked before the
2022  * tunnel is going to be destroyed.
2023  */
2024 static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
2025 {
2026 	struct seg6_action_desc *desc = slwt->desc;
2027 	struct seg6_local_lwtunnel_ops *ops;
2028 
2029 	ops = &desc->slwt_ops;
2030 	if (!ops->destroy_state)
2031 		return;
2032 
2033 	ops->destroy_state(slwt);
2034 }
2035 
2036 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt,
2037 			    struct netlink_ext_ack *extack)
2038 {
2039 	struct seg6_action_param *param;
2040 	struct seg6_action_desc *desc;
2041 	unsigned long invalid_attrs;
2042 	int i, err;
2043 
2044 	desc = __get_action_desc(slwt->action);
2045 	if (!desc)
2046 		return -EINVAL;
2047 
2048 	if (!desc->input)
2049 		return -EOPNOTSUPP;
2050 
2051 	slwt->desc = desc;
2052 	slwt->headroom += desc->static_headroom;
2053 
2054 	/* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
2055 	 * disjoined, this allow us to release acquired resources by optional
2056 	 * attributes and by required attributes independently from each other
2057 	 * without any interference.
2058 	 * In other terms, we are sure that we do not release some the acquired
2059 	 * resources twice.
2060 	 *
2061 	 * Note that if an attribute is configured both as required and as
2062 	 * optional, it means that the user has messed something up in the
2063 	 * seg6_action_table. Therefore, this check is required for SRv6
2064 	 * behaviors to work properly.
2065 	 */
2066 	invalid_attrs = desc->attrs & desc->optattrs;
2067 	if (invalid_attrs) {
2068 		WARN_ONCE(1,
2069 			  "An attribute cannot be both required AND optional");
2070 		return -EINVAL;
2071 	}
2072 
2073 	/* parse the required attributes */
2074 	for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2075 		if (desc->attrs & SEG6_F_ATTR(i)) {
2076 			if (!attrs[i])
2077 				return -EINVAL;
2078 
2079 			param = &seg6_action_params[i];
2080 
2081 			err = param->parse(attrs, slwt, extack);
2082 			if (err < 0)
2083 				goto parse_attrs_err;
2084 		}
2085 	}
2086 
2087 	/* parse the optional attributes, if any */
2088 	err = parse_nla_optional_attrs(attrs, slwt, extack);
2089 	if (err < 0)
2090 		goto parse_attrs_err;
2091 
2092 	return 0;
2093 
2094 parse_attrs_err:
2095 	/* release any resource that may have been acquired during the i-1
2096 	 * parse() operations.
2097 	 */
2098 	__destroy_attrs(desc->attrs, i, slwt);
2099 
2100 	return err;
2101 }
2102 
2103 static int seg6_local_build_state(struct net *net, struct nlattr *nla,
2104 				  unsigned int family, const void *cfg,
2105 				  struct lwtunnel_state **ts,
2106 				  struct netlink_ext_ack *extack)
2107 {
2108 	struct nlattr *tb[SEG6_LOCAL_MAX + 1];
2109 	struct lwtunnel_state *newts;
2110 	struct seg6_local_lwt *slwt;
2111 	int err;
2112 
2113 	if (family != AF_INET6)
2114 		return -EINVAL;
2115 
2116 	err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
2117 					  seg6_local_policy, extack);
2118 
2119 	if (err < 0)
2120 		return err;
2121 
2122 	if (!tb[SEG6_LOCAL_ACTION])
2123 		return -EINVAL;
2124 
2125 	newts = lwtunnel_state_alloc(sizeof(*slwt));
2126 	if (!newts)
2127 		return -ENOMEM;
2128 
2129 	slwt = seg6_local_lwtunnel(newts);
2130 	slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
2131 
2132 	err = parse_nla_action(tb, slwt, extack);
2133 	if (err < 0)
2134 		goto out_free;
2135 
2136 	err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
2137 	if (err < 0)
2138 		goto out_destroy_attrs;
2139 
2140 	newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
2141 	newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
2142 	newts->headroom = slwt->headroom;
2143 
2144 	*ts = newts;
2145 
2146 	return 0;
2147 
2148 out_destroy_attrs:
2149 	destroy_attrs(slwt);
2150 out_free:
2151 	kfree(newts);
2152 	return err;
2153 }
2154 
2155 static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
2156 {
2157 	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2158 
2159 	seg6_local_lwtunnel_destroy_state(slwt);
2160 
2161 	destroy_attrs(slwt);
2162 
2163 	return;
2164 }
2165 
2166 static int seg6_local_fill_encap(struct sk_buff *skb,
2167 				 struct lwtunnel_state *lwt)
2168 {
2169 	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2170 	struct seg6_action_param *param;
2171 	unsigned long attrs;
2172 	int i, err;
2173 
2174 	if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
2175 		return -EMSGSIZE;
2176 
2177 	attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2178 
2179 	for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2180 		if (attrs & SEG6_F_ATTR(i)) {
2181 			param = &seg6_action_params[i];
2182 			err = param->put(skb, slwt);
2183 			if (err < 0)
2184 				return err;
2185 		}
2186 	}
2187 
2188 	return 0;
2189 }
2190 
2191 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
2192 {
2193 	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2194 	unsigned long attrs;
2195 	int nlsize;
2196 
2197 	nlsize = nla_total_size(4); /* action */
2198 
2199 	attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2200 
2201 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH))
2202 		nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
2203 
2204 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE))
2205 		nlsize += nla_total_size(4);
2206 
2207 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4))
2208 		nlsize += nla_total_size(4);
2209 
2210 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6))
2211 		nlsize += nla_total_size(16);
2212 
2213 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF))
2214 		nlsize += nla_total_size(4);
2215 
2216 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF))
2217 		nlsize += nla_total_size(4);
2218 
2219 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF))
2220 		nlsize += nla_total_size(sizeof(struct nlattr)) +
2221 		       nla_total_size(MAX_PROG_NAME) +
2222 		       nla_total_size(4);
2223 
2224 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
2225 		nlsize += nla_total_size(4);
2226 
2227 	if (attrs & SEG6_F_LOCAL_COUNTERS)
2228 		nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */
2229 			  /* SEG6_LOCAL_CNT_PACKETS */
2230 			  nla_total_size_64bit(sizeof(__u64)) +
2231 			  /* SEG6_LOCAL_CNT_BYTES */
2232 			  nla_total_size_64bit(sizeof(__u64)) +
2233 			  /* SEG6_LOCAL_CNT_ERRORS */
2234 			  nla_total_size_64bit(sizeof(__u64));
2235 
2236 	if (attrs & SEG6_F_ATTR(SEG6_LOCAL_FLAVORS))
2237 		nlsize += encap_size_flavors(slwt);
2238 
2239 	return nlsize;
2240 }
2241 
2242 static int seg6_local_cmp_encap(struct lwtunnel_state *a,
2243 				struct lwtunnel_state *b)
2244 {
2245 	struct seg6_local_lwt *slwt_a, *slwt_b;
2246 	struct seg6_action_param *param;
2247 	unsigned long attrs_a, attrs_b;
2248 	int i;
2249 
2250 	slwt_a = seg6_local_lwtunnel(a);
2251 	slwt_b = seg6_local_lwtunnel(b);
2252 
2253 	if (slwt_a->action != slwt_b->action)
2254 		return 1;
2255 
2256 	attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
2257 	attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
2258 
2259 	if (attrs_a != attrs_b)
2260 		return 1;
2261 
2262 	for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2263 		if (attrs_a & SEG6_F_ATTR(i)) {
2264 			param = &seg6_action_params[i];
2265 			if (param->cmp(slwt_a, slwt_b))
2266 				return 1;
2267 		}
2268 	}
2269 
2270 	return 0;
2271 }
2272 
2273 static const struct lwtunnel_encap_ops seg6_local_ops = {
2274 	.build_state	= seg6_local_build_state,
2275 	.destroy_state	= seg6_local_destroy_state,
2276 	.input		= seg6_local_input,
2277 	.fill_encap	= seg6_local_fill_encap,
2278 	.get_encap_size	= seg6_local_get_encap_size,
2279 	.cmp_encap	= seg6_local_cmp_encap,
2280 	.owner		= THIS_MODULE,
2281 };
2282 
2283 int __init seg6_local_init(void)
2284 {
2285 	/* If the max total number of defined attributes is reached, then your
2286 	 * kernel build stops here.
2287 	 *
2288 	 * This check is required to avoid arithmetic overflows when processing
2289 	 * behavior attributes and the maximum number of defined attributes
2290 	 * exceeds the allowed value.
2291 	 */
2292 	BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
2293 
2294 	/* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
2295 	 * bits) have been changed with invalid values, kernel build stops
2296 	 * here.
2297 	 */
2298 	BUILD_BUG_ON(next_csid_chk_cntr_bits(SEG6_LOCAL_LCBLOCK_DBITS,
2299 					     SEG6_LOCAL_LCNODE_FN_DBITS));
2300 	BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
2301 	BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
2302 
2303 	return lwtunnel_encap_add_ops(&seg6_local_ops,
2304 				      LWTUNNEL_ENCAP_SEG6_LOCAL);
2305 }
2306 
2307 void seg6_local_exit(void)
2308 {
2309 	lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
2310 }
2311