1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * SR-IPv6 implementation
4 *
5 * Authors:
6 * David Lebrun <david.lebrun@uclouvain.be>
7 * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
8 */
9
10 #include <linux/filter.h>
11 #include <linux/types.h>
12 #include <linux/skbuff.h>
13 #include <linux/net.h>
14 #include <linux/module.h>
15 #include <net/ip.h>
16 #include <net/lwtunnel.h>
17 #include <net/netevent.h>
18 #include <net/netns/generic.h>
19 #include <net/ip6_fib.h>
20 #include <net/route.h>
21 #include <net/seg6.h>
22 #include <linux/seg6.h>
23 #include <linux/seg6_local.h>
24 #include <net/addrconf.h>
25 #include <net/ip6_route.h>
26 #include <net/dst_cache.h>
27 #include <net/ip_tunnels.h>
28 #ifdef CONFIG_IPV6_SEG6_HMAC
29 #include <net/seg6_hmac.h>
30 #endif
31 #include <net/seg6_local.h>
32 #include <linux/etherdevice.h>
33 #include <linux/bpf.h>
34 #include <linux/netfilter.h>
35
36 #define SEG6_F_ATTR(i) BIT(i)
37
38 struct seg6_local_lwt;
39
40 /* callbacks used for customizing the creation and destruction of a behavior */
41 struct seg6_local_lwtunnel_ops {
42 int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
43 struct netlink_ext_ack *extack);
44 void (*destroy_state)(struct seg6_local_lwt *slwt);
45 };
46
47 struct seg6_action_desc {
48 int action;
49 unsigned long attrs;
50
51 /* The optattrs field is used for specifying all the optional
52 * attributes supported by a specific behavior.
53 * It means that if one of these attributes is not provided in the
54 * netlink message during the behavior creation, no errors will be
55 * returned to the userspace.
56 *
57 * Each attribute can be only of two types (mutually exclusive):
58 * 1) required or 2) optional.
59 * Every user MUST obey to this rule! If you set an attribute as
60 * required the same attribute CANNOT be set as optional and vice
61 * versa.
62 */
63 unsigned long optattrs;
64
65 int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
66 int static_headroom;
67
68 struct seg6_local_lwtunnel_ops slwt_ops;
69 };
70
71 struct bpf_lwt_prog {
72 struct bpf_prog *prog;
73 char *name;
74 };
75
76 /* default length values (expressed in bits) for both Locator-Block and
77 * Locator-Node Function.
78 *
79 * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be:
80 * i) greater than 0;
81 * ii) evenly divisible by 8. In other terms, the lengths of the
82 * Locator-Block and Locator-Node Function must be byte-aligned (we can
83 * relax this constraint in the future if really needed).
84 *
85 * Moreover, a third condition must hold:
86 * iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128.
87 *
88 * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS
89 * values are checked during the kernel compilation. If the compilation stops,
90 * check the value of these parameters to see if they meet conditions (i), (ii)
91 * and (iii).
92 */
93 #define SEG6_LOCAL_LCBLOCK_DBITS 32
94 #define SEG6_LOCAL_LCNODE_FN_DBITS 16
95
96 /* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be
97 * used directly to check whether the lengths (in bits) of Locator-Block and
98 * Locator-Node Function are valid according to (i), (ii), (iii).
99 */
100 #define next_csid_chk_cntr_bits(blen, flen) \
101 ((blen) + (flen) > 128)
102
103 #define next_csid_chk_lcblock_bits(blen) \
104 ({ \
105 typeof(blen) __tmp = blen; \
106 (!__tmp || __tmp > 120 || (__tmp & 0x07)); \
107 })
108
109 #define next_csid_chk_lcnode_fn_bits(flen) \
110 next_csid_chk_lcblock_bits(flen)
111
112 /* flag indicating that flavors are set up for a given End* behavior */
113 #define SEG6_F_LOCAL_FLAVORS SEG6_F_ATTR(SEG6_LOCAL_FLAVORS)
114
115 #define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname)
116 #define SEG6_F_LOCAL_FLV_NEXT_CSID SEG6_F_LOCAL_FLV_OP(NEXT_CSID)
117 #define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP)
118
119 /* Supported RFC8986 Flavor operations are reported in this bitmask */
120 #define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP
121
122 #define SEG6_LOCAL_END_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_NEXT_CSID | \
123 SEG6_LOCAL_FLV8986_SUPP_OPS)
124 #define SEG6_LOCAL_END_X_FLV_SUPP_OPS SEG6_F_LOCAL_FLV_NEXT_CSID
125
126 struct seg6_flavors_info {
127 /* Flavor operations */
128 __u32 flv_ops;
129
130 /* Locator-Block length, expressed in bits */
131 __u8 lcblock_bits;
132 /* Locator-Node Function length, expressed in bits*/
133 __u8 lcnode_func_bits;
134 };
135
136 enum seg6_end_dt_mode {
137 DT_INVALID_MODE = -EINVAL,
138 DT_LEGACY_MODE = 0,
139 DT_VRF_MODE = 1,
140 };
141
142 struct seg6_end_dt_info {
143 enum seg6_end_dt_mode mode;
144
145 struct net *net;
146 /* VRF device associated to the routing table used by the SRv6
147 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
148 */
149 int vrf_ifindex;
150 int vrf_table;
151
152 /* tunneled packet family (IPv4 or IPv6).
153 * Protocol and header length are inferred from family.
154 */
155 u16 family;
156 };
157
158 struct pcpu_seg6_local_counters {
159 u64_stats_t packets;
160 u64_stats_t bytes;
161 u64_stats_t errors;
162
163 struct u64_stats_sync syncp;
164 };
165
166 /* This struct groups all the SRv6 Behavior counters supported so far.
167 *
168 * put_nla_counters() makes use of this data structure to collect all counter
169 * values after the per-CPU counter evaluation has been performed.
170 * Finally, each counter value (in seg6_local_counters) is stored in the
171 * corresponding netlink attribute and sent to user space.
172 *
173 * NB: we don't want to expose this structure to user space!
174 */
175 struct seg6_local_counters {
176 __u64 packets;
177 __u64 bytes;
178 __u64 errors;
179 };
180
181 #define seg6_local_alloc_pcpu_counters(__gfp) \
182 __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \
183 ((__gfp) | __GFP_ZERO))
184
185 #define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS)
186
187 struct seg6_local_lwt {
188 int action;
189 struct ipv6_sr_hdr *srh;
190 int table;
191 struct in_addr nh4;
192 struct in6_addr nh6;
193 int iif;
194 int oif;
195 struct bpf_lwt_prog bpf;
196 #ifdef CONFIG_NET_L3_MASTER_DEV
197 struct seg6_end_dt_info dt_info;
198 #endif
199 struct seg6_flavors_info flv_info;
200
201 struct pcpu_seg6_local_counters __percpu *pcpu_counters;
202
203 int headroom;
204 struct seg6_action_desc *desc;
205 /* unlike the required attrs, we have to track the optional attributes
206 * that have been effectively parsed.
207 */
208 unsigned long parsed_optattrs;
209 };
210
seg6_local_lwtunnel(struct lwtunnel_state * lwt)211 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
212 {
213 return (struct seg6_local_lwt *)lwt->data;
214 }
215
get_and_validate_srh(struct sk_buff * skb)216 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
217 {
218 struct ipv6_sr_hdr *srh;
219
220 srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH);
221 if (!srh)
222 return NULL;
223
224 #ifdef CONFIG_IPV6_SEG6_HMAC
225 if (!seg6_hmac_validate_skb(skb))
226 return NULL;
227 #endif
228
229 return srh;
230 }
231
decap_and_validate(struct sk_buff * skb,int proto)232 static bool decap_and_validate(struct sk_buff *skb, int proto)
233 {
234 struct ipv6_sr_hdr *srh;
235 unsigned int off = 0;
236
237 srh = seg6_get_srh(skb, 0);
238 if (srh && srh->segments_left > 0)
239 return false;
240
241 #ifdef CONFIG_IPV6_SEG6_HMAC
242 if (srh && !seg6_hmac_validate_skb(skb))
243 return false;
244 #endif
245
246 if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
247 return false;
248
249 if (!pskb_pull(skb, off))
250 return false;
251
252 skb_postpull_rcsum(skb, skb_network_header(skb), off);
253
254 skb_reset_network_header(skb);
255 skb_reset_transport_header(skb);
256 if (iptunnel_pull_offloads(skb))
257 return false;
258
259 return true;
260 }
261
advance_nextseg(struct ipv6_sr_hdr * srh,struct in6_addr * daddr)262 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
263 {
264 struct in6_addr *addr;
265
266 srh->segments_left--;
267 addr = srh->segments + srh->segments_left;
268 *daddr = *addr;
269 }
270
271 static int
seg6_lookup_any_nexthop(struct sk_buff * skb,struct in6_addr * nhaddr,u32 tbl_id,bool local_delivery,int oif)272 seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
273 u32 tbl_id, bool local_delivery, int oif)
274 {
275 struct net *net = dev_net(skb->dev);
276 struct ipv6hdr *hdr = ipv6_hdr(skb);
277 int flags = RT6_LOOKUP_F_HAS_SADDR;
278 struct dst_entry *dst = NULL;
279 struct rt6_info *rt;
280 struct flowi6 fl6;
281 int dev_flags = 0;
282
283 memset(&fl6, 0, sizeof(fl6));
284 fl6.flowi6_iif = skb->dev->ifindex;
285 fl6.flowi6_oif = oif;
286 fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
287 fl6.saddr = hdr->saddr;
288 fl6.flowlabel = ip6_flowinfo(hdr);
289 fl6.flowi6_mark = skb->mark;
290 fl6.flowi6_proto = hdr->nexthdr;
291
292 if (nhaddr)
293 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
294
295 if (!tbl_id && !oif) {
296 dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
297 } else if (tbl_id) {
298 struct fib6_table *table;
299
300 table = fib6_get_table(net, tbl_id);
301 if (!table)
302 goto out;
303
304 rt = ip6_pol_route(net, table, oif, &fl6, skb, flags);
305 dst = &rt->dst;
306 } else {
307 dst = ip6_route_output(net, NULL, &fl6);
308 }
309
310 /* we want to discard traffic destined for local packet processing,
311 * if @local_delivery is set to false.
312 */
313 if (!local_delivery)
314 dev_flags |= IFF_LOOPBACK;
315
316 if (dst && (dst_dev(dst)->flags & dev_flags) && !dst->error) {
317 dst_release(dst);
318 dst = NULL;
319 }
320
321 out:
322 if (!dst) {
323 rt = net->ipv6.ip6_blk_hole_entry;
324 dst = &rt->dst;
325 dst_hold(dst);
326 }
327
328 skb_dst_drop(skb);
329 skb_dst_set(skb, dst);
330 return dst->error;
331 }
332
seg6_lookup_nexthop(struct sk_buff * skb,struct in6_addr * nhaddr,u32 tbl_id)333 int seg6_lookup_nexthop(struct sk_buff *skb,
334 struct in6_addr *nhaddr, u32 tbl_id)
335 {
336 return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false, 0);
337 }
338
seg6_flv_lcblock_octects(const struct seg6_flavors_info * finfo)339 static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo)
340 {
341 return finfo->lcblock_bits >> 3;
342 }
343
seg6_flv_lcnode_func_octects(const struct seg6_flavors_info * finfo)344 static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo)
345 {
346 return finfo->lcnode_func_bits >> 3;
347 }
348
seg6_next_csid_is_arg_zero(const struct in6_addr * addr,const struct seg6_flavors_info * finfo)349 static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr,
350 const struct seg6_flavors_info *finfo)
351 {
352 __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
353 __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
354 __u8 arg_octects;
355 int i;
356
357 arg_octects = 16 - blk_octects - fnc_octects;
358 for (i = 0; i < arg_octects; ++i) {
359 if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00)
360 return false;
361 }
362
363 return true;
364 }
365
366 /* assume that DA.Argument length > 0 */
seg6_next_csid_advance_arg(struct in6_addr * addr,const struct seg6_flavors_info * finfo)367 static void seg6_next_csid_advance_arg(struct in6_addr *addr,
368 const struct seg6_flavors_info *finfo)
369 {
370 __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
371 __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
372
373 /* advance DA.Argument */
374 memmove(&addr->s6_addr[blk_octects],
375 &addr->s6_addr[blk_octects + fnc_octects],
376 16 - blk_octects - fnc_octects);
377
378 memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
379 }
380
input_action_end_finish(struct sk_buff * skb,struct seg6_local_lwt * slwt)381 static int input_action_end_finish(struct sk_buff *skb,
382 struct seg6_local_lwt *slwt)
383 {
384 seg6_lookup_nexthop(skb, NULL, 0);
385
386 return dst_input(skb);
387 }
388
input_action_end_core(struct sk_buff * skb,struct seg6_local_lwt * slwt)389 static int input_action_end_core(struct sk_buff *skb,
390 struct seg6_local_lwt *slwt)
391 {
392 struct ipv6_sr_hdr *srh;
393
394 srh = get_and_validate_srh(skb);
395 if (!srh)
396 goto drop;
397
398 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
399
400 return input_action_end_finish(skb, slwt);
401
402 drop:
403 kfree_skb(skb);
404 return -EINVAL;
405 }
406
end_next_csid_core(struct sk_buff * skb,struct seg6_local_lwt * slwt)407 static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
408 {
409 const struct seg6_flavors_info *finfo = &slwt->flv_info;
410 struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
411
412 if (seg6_next_csid_is_arg_zero(daddr, finfo))
413 return input_action_end_core(skb, slwt);
414
415 /* update DA */
416 seg6_next_csid_advance_arg(daddr, finfo);
417
418 return input_action_end_finish(skb, slwt);
419 }
420
input_action_end_x_finish(struct sk_buff * skb,struct seg6_local_lwt * slwt)421 static int input_action_end_x_finish(struct sk_buff *skb,
422 struct seg6_local_lwt *slwt)
423 {
424 seg6_lookup_any_nexthop(skb, &slwt->nh6, 0, false, slwt->oif);
425
426 return dst_input(skb);
427 }
428
input_action_end_x_core(struct sk_buff * skb,struct seg6_local_lwt * slwt)429 static int input_action_end_x_core(struct sk_buff *skb,
430 struct seg6_local_lwt *slwt)
431 {
432 struct ipv6_sr_hdr *srh;
433
434 srh = get_and_validate_srh(skb);
435 if (!srh)
436 goto drop;
437
438 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
439
440 return input_action_end_x_finish(skb, slwt);
441
442 drop:
443 kfree_skb(skb);
444 return -EINVAL;
445 }
446
end_x_next_csid_core(struct sk_buff * skb,struct seg6_local_lwt * slwt)447 static int end_x_next_csid_core(struct sk_buff *skb,
448 struct seg6_local_lwt *slwt)
449 {
450 const struct seg6_flavors_info *finfo = &slwt->flv_info;
451 struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
452
453 if (seg6_next_csid_is_arg_zero(daddr, finfo))
454 return input_action_end_x_core(skb, slwt);
455
456 /* update DA */
457 seg6_next_csid_advance_arg(daddr, finfo);
458
459 return input_action_end_x_finish(skb, slwt);
460 }
461
seg6_next_csid_enabled(__u32 fops)462 static bool seg6_next_csid_enabled(__u32 fops)
463 {
464 return fops & SEG6_F_LOCAL_FLV_NEXT_CSID;
465 }
466
467 /* Processing of SRv6 End, End.X, and End.T behaviors can be extended through
468 * the flavors framework. These behaviors must report the subset of (flavor)
469 * operations they currently implement. In this way, if a user specifies a
470 * flavor combination that is not supported by a given End* behavior, the
471 * kernel refuses to instantiate the tunnel reporting the error.
472 */
seg6_flv_supp_ops_by_action(int action,__u32 * fops)473 static int seg6_flv_supp_ops_by_action(int action, __u32 *fops)
474 {
475 switch (action) {
476 case SEG6_LOCAL_ACTION_END:
477 *fops = SEG6_LOCAL_END_FLV_SUPP_OPS;
478 break;
479 case SEG6_LOCAL_ACTION_END_X:
480 *fops = SEG6_LOCAL_END_X_FLV_SUPP_OPS;
481 break;
482 default:
483 return -EOPNOTSUPP;
484 }
485
486 return 0;
487 }
488
489 /* We describe the packet state in relation to the absence/presence of the SRH
490 * and the Segment Left (SL) field.
491 * For our purposes, it is not necessary to record the exact value of the SL
492 * when the SID List consists of two or more segments.
493 */
494 enum seg6_local_pktinfo {
495 /* the order really matters! */
496 SEG6_LOCAL_PKTINFO_NOHDR = 0,
497 SEG6_LOCAL_PKTINFO_SL_ZERO,
498 SEG6_LOCAL_PKTINFO_SL_ONE,
499 SEG6_LOCAL_PKTINFO_SL_MORE,
500 __SEG6_LOCAL_PKTINFO_MAX,
501 };
502
503 #define SEG6_LOCAL_PKTINFO_MAX (__SEG6_LOCAL_PKTINFO_MAX - 1)
504
seg6_get_srh_pktinfo(struct ipv6_sr_hdr * srh)505 static enum seg6_local_pktinfo seg6_get_srh_pktinfo(struct ipv6_sr_hdr *srh)
506 {
507 __u8 sgl;
508
509 if (!srh)
510 return SEG6_LOCAL_PKTINFO_NOHDR;
511
512 sgl = srh->segments_left;
513 if (sgl < 2)
514 return SEG6_LOCAL_PKTINFO_SL_ZERO + sgl;
515
516 return SEG6_LOCAL_PKTINFO_SL_MORE;
517 }
518
519 enum seg6_local_flv_action {
520 SEG6_LOCAL_FLV_ACT_UNSPEC = 0,
521 SEG6_LOCAL_FLV_ACT_END,
522 SEG6_LOCAL_FLV_ACT_PSP,
523 SEG6_LOCAL_FLV_ACT_USP,
524 SEG6_LOCAL_FLV_ACT_USD,
525 __SEG6_LOCAL_FLV_ACT_MAX
526 };
527
528 #define SEG6_LOCAL_FLV_ACT_MAX (__SEG6_LOCAL_FLV_ACT_MAX - 1)
529
530 /* The action table for RFC8986 flavors (see the flv8986_act_tbl below)
531 * contains the actions (i.e. processing operations) to be applied on packets
532 * when flavors are configured for an End* behavior.
533 * By combining the pkinfo data and from the flavors mask, the macro
534 * computes the index used to access the elements (actions) stored in the
535 * action table. The index is structured as follows:
536 *
537 * index
538 * _______________/\________________
539 * / \
540 * +----------------+----------------+
541 * | pf | afm |
542 * +----------------+----------------+
543 * ph-1 ... p1 p0 fk-1 ... f1 f0
544 * MSB LSB
545 *
546 * where:
547 * - 'afm' (adjusted flavor mask) is the mask containing a combination of the
548 * RFC8986 flavors currently supported. 'afm' corresponds to the @fm
549 * argument of the macro whose value is righ-shifted by 1 bit. By doing so,
550 * we discard the SEG6_LOCAL_FLV_OP_UNSPEC flag (bit 0 in @fm) which is
551 * never used here;
552 * - 'pf' encodes the packet info (pktinfo) regarding the presence/absence of
553 * the SRH, SL = 0, etc. 'pf' is set with the value of @pf provided as
554 * argument to the macro.
555 */
556 #define flv8986_act_tbl_idx(pf, fm) \
557 ((((pf) << bits_per(SEG6_LOCAL_FLV8986_SUPP_OPS)) | \
558 ((fm) & SEG6_LOCAL_FLV8986_SUPP_OPS)) >> SEG6_LOCAL_FLV_OP_PSP)
559
560 /* We compute the size of the action table by considering the RFC8986 flavors
561 * actually supported by the kernel. In this way, the size is automatically
562 * adjusted when new flavors are supported.
563 */
564 #define FLV8986_ACT_TBL_SIZE \
565 roundup_pow_of_two(flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_MAX, \
566 SEG6_LOCAL_FLV8986_SUPP_OPS))
567
568 /* tbl_cfg(act, pf, fm) macro is used to easily configure the action
569 * table; it accepts 3 arguments:
570 * i) @act, the suffix from SEG6_LOCAL_FLV_ACT_{act} representing
571 * the action that should be applied on the packet;
572 * ii) @pf, the suffix from SEG6_LOCAL_PKTINFO_{pf} reporting the packet
573 * info about the lack/presence of SRH, SRH with SL = 0, etc;
574 * iii) @fm, the mask of flavors.
575 */
576 #define tbl_cfg(act, pf, fm) \
577 [flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_##pf, \
578 (fm))] = SEG6_LOCAL_FLV_ACT_##act
579
580 /* shorthand for improving readability */
581 #define F_PSP SEG6_F_LOCAL_FLV_PSP
582
583 /* The table contains, for each combination of the pktinfo data and
584 * flavors, the action that should be taken on a packet (e.g.
585 * "standard" Endpoint processing, Penultimate Segment Pop, etc).
586 *
587 * By default, table entries not explicitly configured are initialized with the
588 * SEG6_LOCAL_FLV_ACT_UNSPEC action, which generally has the effect of
589 * discarding the processed packet.
590 */
591 static const u8 flv8986_act_tbl[FLV8986_ACT_TBL_SIZE] = {
592 /* PSP variant for packet where SRH with SL = 1 */
593 tbl_cfg(PSP, SL_ONE, F_PSP),
594 /* End for packet where the SRH with SL > 1*/
595 tbl_cfg(END, SL_MORE, F_PSP),
596 };
597
598 #undef F_PSP
599 #undef tbl_cfg
600
601 /* For each flavor defined in RFC8986 (or a combination of them) an action is
602 * performed on the packet. The specific action depends on:
603 * - info extracted from the packet (i.e. pktinfo data) regarding the
604 * lack/presence of the SRH, and if the SRH is available, on the value of
605 * Segment Left field;
606 * - the mask of flavors configured for the specific SRv6 End* behavior.
607 *
608 * The function combines both the pkinfo and the flavors mask to evaluate the
609 * corresponding action to be taken on the packet.
610 */
611 static enum seg6_local_flv_action
seg6_local_flv8986_act_lookup(enum seg6_local_pktinfo pinfo,__u32 flvmask)612 seg6_local_flv8986_act_lookup(enum seg6_local_pktinfo pinfo, __u32 flvmask)
613 {
614 unsigned long index;
615
616 /* check if the provided mask of flavors is supported */
617 if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS))
618 return SEG6_LOCAL_FLV_ACT_UNSPEC;
619
620 index = flv8986_act_tbl_idx(pinfo, flvmask);
621 if (unlikely(index >= FLV8986_ACT_TBL_SIZE))
622 return SEG6_LOCAL_FLV_ACT_UNSPEC;
623
624 return flv8986_act_tbl[index];
625 }
626
627 /* skb->data must be aligned with skb->network_header */
seg6_pop_srh(struct sk_buff * skb,int srhoff)628 static bool seg6_pop_srh(struct sk_buff *skb, int srhoff)
629 {
630 struct ipv6_sr_hdr *srh;
631 struct ipv6hdr *iph;
632 __u8 srh_nexthdr;
633 int thoff = -1;
634 int srhlen;
635 int nhlen;
636
637 if (unlikely(srhoff < sizeof(*iph) ||
638 !pskb_may_pull(skb, srhoff + sizeof(*srh))))
639 return false;
640
641 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
642 srhlen = ipv6_optlen(srh);
643
644 /* we are about to mangle the pkt, let's check if we can write on it */
645 if (unlikely(skb_ensure_writable(skb, srhoff + srhlen)))
646 return false;
647
648 /* skb_ensure_writable() may change skb pointers; evaluate srh again */
649 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
650 srh_nexthdr = srh->nexthdr;
651
652 if (unlikely(!skb_transport_header_was_set(skb)))
653 goto pull;
654
655 nhlen = skb_network_header_len(skb);
656 /* we have to deal with the transport header: it could be set before
657 * the SRH, after the SRH, or within it (which is considered wrong,
658 * however).
659 */
660 if (likely(nhlen <= srhoff))
661 thoff = nhlen;
662 else if (nhlen >= srhoff + srhlen)
663 /* transport_header is set after the SRH */
664 thoff = nhlen - srhlen;
665 else
666 /* transport_header falls inside the SRH; hence, we can't
667 * restore the transport_header pointer properly after
668 * SRH removing operation.
669 */
670 return false;
671 pull:
672 /* we need to pop the SRH:
673 * 1) first of all, we pull out everything from IPv6 header up to SRH
674 * (included) evaluating also the rcsum;
675 * 2) we overwrite (and then remove) the SRH by properly moving the
676 * IPv6 along with any extension header that precedes the SRH;
677 * 3) At the end, we push back the pulled headers (except for SRH,
678 * obviously).
679 */
680 skb_pull_rcsum(skb, srhoff + srhlen);
681 memmove(skb_network_header(skb) + srhlen, skb_network_header(skb),
682 srhoff);
683 skb_push(skb, srhoff);
684
685 skb_reset_network_header(skb);
686 skb_mac_header_rebuild(skb);
687 if (likely(thoff >= 0))
688 skb_set_transport_header(skb, thoff);
689
690 iph = ipv6_hdr(skb);
691 if (iph->nexthdr == NEXTHDR_ROUTING) {
692 iph->nexthdr = srh_nexthdr;
693 } else {
694 /* we must look for the extension header (EXTH, for short) that
695 * immediately precedes the SRH we have just removed.
696 * Then, we update the value of the EXTH nexthdr with the one
697 * contained in the SRH nexthdr.
698 */
699 unsigned int off = sizeof(*iph);
700 struct ipv6_opt_hdr *hp, _hdr;
701 __u8 nexthdr = iph->nexthdr;
702
703 for (;;) {
704 if (unlikely(!ipv6_ext_hdr(nexthdr) ||
705 nexthdr == NEXTHDR_NONE))
706 return false;
707
708 hp = skb_header_pointer(skb, off, sizeof(_hdr), &_hdr);
709 if (unlikely(!hp))
710 return false;
711
712 if (hp->nexthdr == NEXTHDR_ROUTING) {
713 hp->nexthdr = srh_nexthdr;
714 break;
715 }
716
717 switch (nexthdr) {
718 case NEXTHDR_FRAGMENT:
719 fallthrough;
720 case NEXTHDR_AUTH:
721 /* we expect SRH before FRAG and AUTH */
722 return false;
723 default:
724 off += ipv6_optlen(hp);
725 break;
726 }
727
728 nexthdr = hp->nexthdr;
729 }
730 }
731
732 iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
733
734 skb_postpush_rcsum(skb, iph, srhoff);
735
736 return true;
737 }
738
739 /* process the packet on the basis of the RFC8986 flavors set for the given
740 * SRv6 End behavior instance.
741 */
end_flv8986_core(struct sk_buff * skb,struct seg6_local_lwt * slwt)742 static int end_flv8986_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
743 {
744 const struct seg6_flavors_info *finfo = &slwt->flv_info;
745 enum seg6_local_flv_action action;
746 enum seg6_local_pktinfo pinfo;
747 struct ipv6_sr_hdr *srh;
748 __u32 flvmask;
749 int srhoff;
750
751 srh = seg6_get_srh(skb, 0);
752 srhoff = srh ? ((unsigned char *)srh - skb->data) : 0;
753 pinfo = seg6_get_srh_pktinfo(srh);
754 #ifdef CONFIG_IPV6_SEG6_HMAC
755 if (srh && !seg6_hmac_validate_skb(skb))
756 goto drop;
757 #endif
758 flvmask = finfo->flv_ops;
759 if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) {
760 pr_warn_once("seg6local: invalid RFC8986 flavors\n");
761 goto drop;
762 }
763
764 /* retrieve the action triggered by the combination of pktinfo data and
765 * the flavors mask.
766 */
767 action = seg6_local_flv8986_act_lookup(pinfo, flvmask);
768 switch (action) {
769 case SEG6_LOCAL_FLV_ACT_END:
770 /* process the packet as the "standard" End behavior */
771 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
772 break;
773 case SEG6_LOCAL_FLV_ACT_PSP:
774 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
775
776 if (unlikely(!seg6_pop_srh(skb, srhoff)))
777 goto drop;
778 break;
779 case SEG6_LOCAL_FLV_ACT_UNSPEC:
780 fallthrough;
781 default:
782 /* by default, we drop the packet since we could not find a
783 * suitable action.
784 */
785 goto drop;
786 }
787
788 return input_action_end_finish(skb, slwt);
789
790 drop:
791 kfree_skb(skb);
792 return -EINVAL;
793 }
794
795 /* regular endpoint function */
input_action_end(struct sk_buff * skb,struct seg6_local_lwt * slwt)796 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
797 {
798 const struct seg6_flavors_info *finfo = &slwt->flv_info;
799 __u32 fops = finfo->flv_ops;
800
801 if (!fops)
802 return input_action_end_core(skb, slwt);
803
804 /* check for the presence of NEXT-C-SID since it applies first */
805 if (seg6_next_csid_enabled(fops))
806 return end_next_csid_core(skb, slwt);
807
808 /* the specific processing function to be performed on the packet
809 * depends on the combination of flavors defined in RFC8986 and some
810 * information extracted from the packet, e.g. presence/absence of SRH,
811 * Segment Left = 0, etc.
812 */
813 return end_flv8986_core(skb, slwt);
814 }
815
816 /* regular endpoint, and forward to specified nexthop */
input_action_end_x(struct sk_buff * skb,struct seg6_local_lwt * slwt)817 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
818 {
819 const struct seg6_flavors_info *finfo = &slwt->flv_info;
820 __u32 fops = finfo->flv_ops;
821
822 /* check for the presence of NEXT-C-SID since it applies first */
823 if (seg6_next_csid_enabled(fops))
824 return end_x_next_csid_core(skb, slwt);
825
826 return input_action_end_x_core(skb, slwt);
827 }
828
input_action_end_t(struct sk_buff * skb,struct seg6_local_lwt * slwt)829 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
830 {
831 struct ipv6_sr_hdr *srh;
832
833 srh = get_and_validate_srh(skb);
834 if (!srh)
835 goto drop;
836
837 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
838
839 seg6_lookup_nexthop(skb, NULL, slwt->table);
840
841 return dst_input(skb);
842
843 drop:
844 kfree_skb(skb);
845 return -EINVAL;
846 }
847
848 /* decapsulate and forward inner L2 frame on specified interface */
input_action_end_dx2(struct sk_buff * skb,struct seg6_local_lwt * slwt)849 static int input_action_end_dx2(struct sk_buff *skb,
850 struct seg6_local_lwt *slwt)
851 {
852 struct net *net = dev_net(skb->dev);
853 struct net_device *odev;
854 struct ethhdr *eth;
855
856 if (!decap_and_validate(skb, IPPROTO_ETHERNET))
857 goto drop;
858
859 if (!pskb_may_pull(skb, ETH_HLEN))
860 goto drop;
861
862 skb_reset_mac_header(skb);
863 eth = (struct ethhdr *)skb->data;
864
865 /* To determine the frame's protocol, we assume it is 802.3. This avoids
866 * a call to eth_type_trans(), which is not really relevant for our
867 * use case.
868 */
869 if (!eth_proto_is_802_3(eth->h_proto))
870 goto drop;
871
872 odev = dev_get_by_index_rcu(net, slwt->oif);
873 if (!odev)
874 goto drop;
875
876 /* As we accept Ethernet frames, make sure the egress device is of
877 * the correct type.
878 */
879 if (odev->type != ARPHRD_ETHER)
880 goto drop;
881
882 if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
883 goto drop;
884
885 skb_orphan(skb);
886
887 if (skb_warn_if_lro(skb))
888 goto drop;
889
890 skb_forward_csum(skb);
891
892 if (skb->len - ETH_HLEN > odev->mtu)
893 goto drop;
894
895 skb->dev = odev;
896 skb->protocol = eth->h_proto;
897
898 return dev_queue_xmit(skb);
899
900 drop:
901 kfree_skb(skb);
902 return -EINVAL;
903 }
904
input_action_end_dx6_finish(struct net * net,struct sock * sk,struct sk_buff * skb)905 static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
906 struct sk_buff *skb)
907 {
908 struct dst_entry *orig_dst = skb_dst(skb);
909 struct in6_addr *nhaddr = NULL;
910 struct seg6_local_lwt *slwt;
911
912 slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
913
914 /* The inner packet is not associated to any local interface,
915 * so we do not call netif_rx().
916 *
917 * If slwt->nh6 is set to ::, then lookup the nexthop for the
918 * inner packet's DA. Otherwise, use the specified nexthop.
919 */
920 if (!ipv6_addr_any(&slwt->nh6))
921 nhaddr = &slwt->nh6;
922
923 seg6_lookup_nexthop(skb, nhaddr, 0);
924
925 return dst_input(skb);
926 }
927
928 /* decapsulate and forward to specified nexthop */
input_action_end_dx6(struct sk_buff * skb,struct seg6_local_lwt * slwt)929 static int input_action_end_dx6(struct sk_buff *skb,
930 struct seg6_local_lwt *slwt)
931 {
932 /* this function accepts IPv6 encapsulated packets, with either
933 * an SRH with SL=0, or no SRH.
934 */
935
936 if (!decap_and_validate(skb, IPPROTO_IPV6))
937 goto drop;
938
939 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
940 goto drop;
941
942 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
943 nf_reset_ct(skb);
944
945 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
946 return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
947 dev_net(skb->dev), NULL, skb, skb->dev,
948 NULL, input_action_end_dx6_finish);
949
950 return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
951 drop:
952 kfree_skb(skb);
953 return -EINVAL;
954 }
955
input_action_end_dx4_finish(struct net * net,struct sock * sk,struct sk_buff * skb)956 static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
957 struct sk_buff *skb)
958 {
959 struct dst_entry *orig_dst = skb_dst(skb);
960 enum skb_drop_reason reason;
961 struct seg6_local_lwt *slwt;
962 struct iphdr *iph;
963 __be32 nhaddr;
964
965 slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
966
967 iph = ip_hdr(skb);
968
969 nhaddr = slwt->nh4.s_addr ?: iph->daddr;
970
971 skb_dst_drop(skb);
972
973 reason = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
974 if (reason) {
975 kfree_skb_reason(skb, reason);
976 return -EINVAL;
977 }
978
979 return dst_input(skb);
980 }
981
input_action_end_dx4(struct sk_buff * skb,struct seg6_local_lwt * slwt)982 static int input_action_end_dx4(struct sk_buff *skb,
983 struct seg6_local_lwt *slwt)
984 {
985 if (!decap_and_validate(skb, IPPROTO_IPIP))
986 goto drop;
987
988 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
989 goto drop;
990
991 skb->protocol = htons(ETH_P_IP);
992 skb_set_transport_header(skb, sizeof(struct iphdr));
993 nf_reset_ct(skb);
994
995 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
996 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
997 dev_net(skb->dev), NULL, skb, skb->dev,
998 NULL, input_action_end_dx4_finish);
999
1000 return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
1001 drop:
1002 kfree_skb(skb);
1003 return -EINVAL;
1004 }
1005
1006 #ifdef CONFIG_NET_L3_MASTER_DEV
fib6_config_get_net(const struct fib6_config * fib6_cfg)1007 static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
1008 {
1009 const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
1010
1011 return nli->nl_net;
1012 }
1013
__seg6_end_dt_vrf_build(struct seg6_local_lwt * slwt,const void * cfg,u16 family,struct netlink_ext_ack * extack)1014 static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
1015 u16 family, struct netlink_ext_ack *extack)
1016 {
1017 struct seg6_end_dt_info *info = &slwt->dt_info;
1018 int vrf_ifindex;
1019 struct net *net;
1020
1021 net = fib6_config_get_net(cfg);
1022
1023 /* note that vrf_table was already set by parse_nla_vrftable() */
1024 vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
1025 info->vrf_table);
1026 if (vrf_ifindex < 0) {
1027 if (vrf_ifindex == -EPERM) {
1028 NL_SET_ERR_MSG(extack,
1029 "Strict mode for VRF is disabled");
1030 } else if (vrf_ifindex == -ENODEV) {
1031 NL_SET_ERR_MSG(extack,
1032 "Table has no associated VRF device");
1033 } else {
1034 pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
1035 vrf_ifindex);
1036 }
1037
1038 return vrf_ifindex;
1039 }
1040
1041 info->net = net;
1042 info->vrf_ifindex = vrf_ifindex;
1043
1044 info->family = family;
1045 info->mode = DT_VRF_MODE;
1046
1047 return 0;
1048 }
1049
1050 /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
1051 * routes the IPv4/IPv6 packet by looking at the configured routing table.
1052 *
1053 * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
1054 * Routing Header packets) from several interfaces and the outer IPv6
1055 * destination address (DA) is used for retrieving the specific instance of the
1056 * End.DT4/DT6 behavior that should process the packets.
1057 *
1058 * However, the inner IPv4/IPv6 packet is not really bound to any receiving
1059 * interface and thus the End.DT4/DT6 sets the VRF (associated with the
1060 * corresponding routing table) as the *receiving* interface.
1061 * In other words, the End.DT4/DT6 processes a packet as if it has been received
1062 * directly by the VRF (and not by one of its slave devices, if any).
1063 * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
1064 * according to the routing table configured by the End.DT4/DT6 instance.
1065 *
1066 * This design allows you to get some interesting features like:
1067 * 1) the statistics on rx packets;
1068 * 2) the possibility to install a packet sniffer on the receiving interface
1069 * (the VRF one) for looking at the incoming packets;
1070 * 3) the possibility to leverage the netfilter prerouting hook for the inner
1071 * IPv4 packet.
1072 *
1073 * This function returns:
1074 * - the sk_buff* when the VRF rcv handler has processed the packet correctly;
1075 * - NULL when the skb is consumed by the VRF rcv handler;
1076 * - a pointer which encodes a negative error number in case of error.
1077 * Note that in this case, the function takes care of freeing the skb.
1078 */
end_dt_vrf_rcv(struct sk_buff * skb,u16 family,struct net_device * dev)1079 static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
1080 struct net_device *dev)
1081 {
1082 /* based on l3mdev_ip_rcv; we are only interested in the master */
1083 if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
1084 goto drop;
1085
1086 if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
1087 goto drop;
1088
1089 /* the decap packet IPv4/IPv6 does not come with any mac header info.
1090 * We must unset the mac header to allow the VRF device to rebuild it,
1091 * just in case there is a sniffer attached on the device.
1092 */
1093 skb_unset_mac_header(skb);
1094
1095 skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
1096 if (!skb)
1097 /* the skb buffer was consumed by the handler */
1098 return NULL;
1099
1100 /* when a packet is received by a VRF or by one of its slaves, the
1101 * master device reference is set into the skb.
1102 */
1103 if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
1104 goto drop;
1105
1106 return skb;
1107
1108 drop:
1109 kfree_skb(skb);
1110 return ERR_PTR(-EINVAL);
1111 }
1112
end_dt_get_vrf_rcu(struct sk_buff * skb,struct seg6_end_dt_info * info)1113 static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
1114 struct seg6_end_dt_info *info)
1115 {
1116 int vrf_ifindex = info->vrf_ifindex;
1117 struct net *net = info->net;
1118
1119 if (unlikely(vrf_ifindex < 0))
1120 goto error;
1121
1122 if (unlikely(!net_eq(dev_net(skb->dev), net)))
1123 goto error;
1124
1125 return dev_get_by_index_rcu(net, vrf_ifindex);
1126
1127 error:
1128 return NULL;
1129 }
1130
end_dt_vrf_core(struct sk_buff * skb,struct seg6_local_lwt * slwt,u16 family)1131 static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
1132 struct seg6_local_lwt *slwt, u16 family)
1133 {
1134 struct seg6_end_dt_info *info = &slwt->dt_info;
1135 struct net_device *vrf;
1136 __be16 protocol;
1137 int hdrlen;
1138
1139 vrf = end_dt_get_vrf_rcu(skb, info);
1140 if (unlikely(!vrf))
1141 goto drop;
1142
1143 switch (family) {
1144 case AF_INET:
1145 protocol = htons(ETH_P_IP);
1146 hdrlen = sizeof(struct iphdr);
1147 break;
1148 case AF_INET6:
1149 protocol = htons(ETH_P_IPV6);
1150 hdrlen = sizeof(struct ipv6hdr);
1151 break;
1152 case AF_UNSPEC:
1153 fallthrough;
1154 default:
1155 goto drop;
1156 }
1157
1158 if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
1159 pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
1160 goto drop;
1161 }
1162
1163 skb->protocol = protocol;
1164
1165 skb_dst_drop(skb);
1166
1167 skb_set_transport_header(skb, hdrlen);
1168 nf_reset_ct(skb);
1169
1170 return end_dt_vrf_rcv(skb, family, vrf);
1171
1172 drop:
1173 kfree_skb(skb);
1174 return ERR_PTR(-EINVAL);
1175 }
1176
input_action_end_dt4(struct sk_buff * skb,struct seg6_local_lwt * slwt)1177 static int input_action_end_dt4(struct sk_buff *skb,
1178 struct seg6_local_lwt *slwt)
1179 {
1180 enum skb_drop_reason reason;
1181 struct iphdr *iph;
1182
1183 if (!decap_and_validate(skb, IPPROTO_IPIP))
1184 goto drop;
1185
1186 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
1187 goto drop;
1188
1189 skb = end_dt_vrf_core(skb, slwt, AF_INET);
1190 if (!skb)
1191 /* packet has been processed and consumed by the VRF */
1192 return 0;
1193
1194 if (IS_ERR(skb))
1195 return PTR_ERR(skb);
1196
1197 iph = ip_hdr(skb);
1198
1199 reason = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
1200 if (unlikely(reason))
1201 goto drop;
1202
1203 return dst_input(skb);
1204
1205 drop:
1206 kfree_skb(skb);
1207 return -EINVAL;
1208 }
1209
seg6_end_dt4_build(struct seg6_local_lwt * slwt,const void * cfg,struct netlink_ext_ack * extack)1210 static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
1211 struct netlink_ext_ack *extack)
1212 {
1213 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
1214 }
1215
1216 static enum
seg6_end_dt6_parse_mode(struct seg6_local_lwt * slwt)1217 seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
1218 {
1219 unsigned long parsed_optattrs = slwt->parsed_optattrs;
1220 bool legacy, vrfmode;
1221
1222 legacy = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE));
1223 vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE));
1224
1225 if (!(legacy ^ vrfmode))
1226 /* both are absent or present: invalid DT6 mode */
1227 return DT_INVALID_MODE;
1228
1229 return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
1230 }
1231
seg6_end_dt6_get_mode(struct seg6_local_lwt * slwt)1232 static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
1233 {
1234 struct seg6_end_dt_info *info = &slwt->dt_info;
1235
1236 return info->mode;
1237 }
1238
seg6_end_dt6_build(struct seg6_local_lwt * slwt,const void * cfg,struct netlink_ext_ack * extack)1239 static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
1240 struct netlink_ext_ack *extack)
1241 {
1242 enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
1243 struct seg6_end_dt_info *info = &slwt->dt_info;
1244
1245 switch (mode) {
1246 case DT_LEGACY_MODE:
1247 info->mode = DT_LEGACY_MODE;
1248 return 0;
1249 case DT_VRF_MODE:
1250 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
1251 default:
1252 NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
1253 return -EINVAL;
1254 }
1255 }
1256 #endif
1257
input_action_end_dt6(struct sk_buff * skb,struct seg6_local_lwt * slwt)1258 static int input_action_end_dt6(struct sk_buff *skb,
1259 struct seg6_local_lwt *slwt)
1260 {
1261 if (!decap_and_validate(skb, IPPROTO_IPV6))
1262 goto drop;
1263
1264 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
1265 goto drop;
1266
1267 #ifdef CONFIG_NET_L3_MASTER_DEV
1268 if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
1269 goto legacy_mode;
1270
1271 /* DT6_VRF_MODE */
1272 skb = end_dt_vrf_core(skb, slwt, AF_INET6);
1273 if (!skb)
1274 /* packet has been processed and consumed by the VRF */
1275 return 0;
1276
1277 if (IS_ERR(skb))
1278 return PTR_ERR(skb);
1279
1280 /* note: this time we do not need to specify the table because the VRF
1281 * takes care of selecting the correct table.
1282 */
1283 seg6_lookup_any_nexthop(skb, NULL, 0, true, 0);
1284
1285 return dst_input(skb);
1286
1287 legacy_mode:
1288 #endif
1289 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
1290
1291 seg6_lookup_any_nexthop(skb, NULL, slwt->table, true, 0);
1292
1293 return dst_input(skb);
1294
1295 drop:
1296 kfree_skb(skb);
1297 return -EINVAL;
1298 }
1299
1300 #ifdef CONFIG_NET_L3_MASTER_DEV
seg6_end_dt46_build(struct seg6_local_lwt * slwt,const void * cfg,struct netlink_ext_ack * extack)1301 static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
1302 struct netlink_ext_ack *extack)
1303 {
1304 return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
1305 }
1306
input_action_end_dt46(struct sk_buff * skb,struct seg6_local_lwt * slwt)1307 static int input_action_end_dt46(struct sk_buff *skb,
1308 struct seg6_local_lwt *slwt)
1309 {
1310 unsigned int off = 0;
1311 int nexthdr;
1312
1313 nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
1314 if (unlikely(nexthdr < 0))
1315 goto drop;
1316
1317 switch (nexthdr) {
1318 case IPPROTO_IPIP:
1319 return input_action_end_dt4(skb, slwt);
1320 case IPPROTO_IPV6:
1321 return input_action_end_dt6(skb, slwt);
1322 }
1323
1324 drop:
1325 kfree_skb(skb);
1326 return -EINVAL;
1327 }
1328 #endif
1329
1330 /* push an SRH on top of the current one */
input_action_end_b6(struct sk_buff * skb,struct seg6_local_lwt * slwt)1331 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1332 {
1333 struct ipv6_sr_hdr *srh;
1334 int err = -EINVAL;
1335
1336 srh = get_and_validate_srh(skb);
1337 if (!srh)
1338 goto drop;
1339
1340 err = seg6_do_srh_inline(skb, slwt->srh);
1341 if (err)
1342 goto drop;
1343
1344 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
1345
1346 seg6_lookup_nexthop(skb, NULL, 0);
1347
1348 return dst_input(skb);
1349
1350 drop:
1351 kfree_skb(skb);
1352 return err;
1353 }
1354
1355 /* encapsulate within an outer IPv6 header and a specified SRH */
input_action_end_b6_encap(struct sk_buff * skb,struct seg6_local_lwt * slwt)1356 static int input_action_end_b6_encap(struct sk_buff *skb,
1357 struct seg6_local_lwt *slwt)
1358 {
1359 struct ipv6_sr_hdr *srh;
1360 int err = -EINVAL;
1361
1362 srh = get_and_validate_srh(skb);
1363 if (!srh)
1364 goto drop;
1365
1366 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
1367
1368 skb_reset_inner_headers(skb);
1369 skb->encapsulation = 1;
1370
1371 err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
1372 if (err)
1373 goto drop;
1374
1375 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
1376
1377 seg6_lookup_nexthop(skb, NULL, 0);
1378
1379 return dst_input(skb);
1380
1381 drop:
1382 kfree_skb(skb);
1383 return err;
1384 }
1385
1386 DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states) = {
1387 .bh_lock = INIT_LOCAL_LOCK(bh_lock),
1388 };
1389
seg6_bpf_has_valid_srh(struct sk_buff * skb)1390 bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
1391 {
1392 struct seg6_bpf_srh_state *srh_state =
1393 this_cpu_ptr(&seg6_bpf_srh_states);
1394 struct ipv6_sr_hdr *srh = srh_state->srh;
1395
1396 lockdep_assert_held(&srh_state->bh_lock);
1397 if (unlikely(srh == NULL))
1398 return false;
1399
1400 if (unlikely(!srh_state->valid)) {
1401 if ((srh_state->hdrlen & 7) != 0)
1402 return false;
1403
1404 srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
1405 if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
1406 return false;
1407
1408 srh_state->valid = true;
1409 }
1410
1411 return true;
1412 }
1413
input_action_end_bpf(struct sk_buff * skb,struct seg6_local_lwt * slwt)1414 static int input_action_end_bpf(struct sk_buff *skb,
1415 struct seg6_local_lwt *slwt)
1416 {
1417 struct seg6_bpf_srh_state *srh_state;
1418 struct ipv6_sr_hdr *srh;
1419 int ret;
1420
1421 srh = get_and_validate_srh(skb);
1422 if (!srh) {
1423 kfree_skb(skb);
1424 return -EINVAL;
1425 }
1426 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
1427
1428 /* The access to the per-CPU buffer srh_state is protected by running
1429 * always in softirq context (with disabled BH). On PREEMPT_RT the
1430 * required locking is provided by the following local_lock_nested_bh()
1431 * statement. It is also accessed by the bpf_lwt_seg6_* helpers via
1432 * bpf_prog_run_save_cb().
1433 */
1434 local_lock_nested_bh(&seg6_bpf_srh_states.bh_lock);
1435 srh_state = this_cpu_ptr(&seg6_bpf_srh_states);
1436 srh_state->srh = srh;
1437 srh_state->hdrlen = srh->hdrlen << 3;
1438 srh_state->valid = true;
1439
1440 rcu_read_lock();
1441 bpf_compute_data_pointers(skb);
1442 ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
1443 rcu_read_unlock();
1444
1445 switch (ret) {
1446 case BPF_OK:
1447 case BPF_REDIRECT:
1448 break;
1449 case BPF_DROP:
1450 goto drop;
1451 default:
1452 pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
1453 goto drop;
1454 }
1455
1456 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
1457 goto drop;
1458 local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
1459
1460 if (ret != BPF_REDIRECT)
1461 seg6_lookup_nexthop(skb, NULL, 0);
1462
1463 return dst_input(skb);
1464
1465 drop:
1466 local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
1467 kfree_skb(skb);
1468 return -EINVAL;
1469 }
1470
1471 static struct seg6_action_desc seg6_action_table[] = {
1472 {
1473 .action = SEG6_LOCAL_ACTION_END,
1474 .attrs = 0,
1475 .optattrs = SEG6_F_LOCAL_COUNTERS |
1476 SEG6_F_LOCAL_FLAVORS,
1477 .input = input_action_end,
1478 },
1479 {
1480 .action = SEG6_LOCAL_ACTION_END_X,
1481 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
1482 .optattrs = SEG6_F_LOCAL_COUNTERS |
1483 SEG6_F_LOCAL_FLAVORS |
1484 SEG6_F_ATTR(SEG6_LOCAL_OIF),
1485 .input = input_action_end_x,
1486 },
1487 {
1488 .action = SEG6_LOCAL_ACTION_END_T,
1489 .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
1490 .optattrs = SEG6_F_LOCAL_COUNTERS,
1491 .input = input_action_end_t,
1492 },
1493 {
1494 .action = SEG6_LOCAL_ACTION_END_DX2,
1495 .attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF),
1496 .optattrs = SEG6_F_LOCAL_COUNTERS,
1497 .input = input_action_end_dx2,
1498 },
1499 {
1500 .action = SEG6_LOCAL_ACTION_END_DX6,
1501 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
1502 .optattrs = SEG6_F_LOCAL_COUNTERS,
1503 .input = input_action_end_dx6,
1504 },
1505 {
1506 .action = SEG6_LOCAL_ACTION_END_DX4,
1507 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4),
1508 .optattrs = SEG6_F_LOCAL_COUNTERS,
1509 .input = input_action_end_dx4,
1510 },
1511 {
1512 .action = SEG6_LOCAL_ACTION_END_DT4,
1513 .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1514 .optattrs = SEG6_F_LOCAL_COUNTERS,
1515 #ifdef CONFIG_NET_L3_MASTER_DEV
1516 .input = input_action_end_dt4,
1517 .slwt_ops = {
1518 .build_state = seg6_end_dt4_build,
1519 },
1520 #endif
1521 },
1522 {
1523 .action = SEG6_LOCAL_ACTION_END_DT6,
1524 #ifdef CONFIG_NET_L3_MASTER_DEV
1525 .attrs = 0,
1526 .optattrs = SEG6_F_LOCAL_COUNTERS |
1527 SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
1528 SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1529 .slwt_ops = {
1530 .build_state = seg6_end_dt6_build,
1531 },
1532 #else
1533 .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
1534 .optattrs = SEG6_F_LOCAL_COUNTERS,
1535 #endif
1536 .input = input_action_end_dt6,
1537 },
1538 {
1539 .action = SEG6_LOCAL_ACTION_END_DT46,
1540 .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
1541 .optattrs = SEG6_F_LOCAL_COUNTERS,
1542 #ifdef CONFIG_NET_L3_MASTER_DEV
1543 .input = input_action_end_dt46,
1544 .slwt_ops = {
1545 .build_state = seg6_end_dt46_build,
1546 },
1547 #endif
1548 },
1549 {
1550 .action = SEG6_LOCAL_ACTION_END_B6,
1551 .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
1552 .optattrs = SEG6_F_LOCAL_COUNTERS,
1553 .input = input_action_end_b6,
1554 },
1555 {
1556 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
1557 .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
1558 .optattrs = SEG6_F_LOCAL_COUNTERS,
1559 .input = input_action_end_b6_encap,
1560 .static_headroom = sizeof(struct ipv6hdr),
1561 },
1562 {
1563 .action = SEG6_LOCAL_ACTION_END_BPF,
1564 .attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF),
1565 .optattrs = SEG6_F_LOCAL_COUNTERS,
1566 .input = input_action_end_bpf,
1567 },
1568
1569 };
1570
__get_action_desc(int action)1571 static struct seg6_action_desc *__get_action_desc(int action)
1572 {
1573 struct seg6_action_desc *desc;
1574 int i, count;
1575
1576 count = ARRAY_SIZE(seg6_action_table);
1577 for (i = 0; i < count; i++) {
1578 desc = &seg6_action_table[i];
1579 if (desc->action == action)
1580 return desc;
1581 }
1582
1583 return NULL;
1584 }
1585
seg6_lwtunnel_counters_enabled(struct seg6_local_lwt * slwt)1586 static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt)
1587 {
1588 return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS;
1589 }
1590
seg6_local_update_counters(struct seg6_local_lwt * slwt,unsigned int len,int err)1591 static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
1592 unsigned int len, int err)
1593 {
1594 struct pcpu_seg6_local_counters *pcounters;
1595
1596 pcounters = this_cpu_ptr(slwt->pcpu_counters);
1597 u64_stats_update_begin(&pcounters->syncp);
1598
1599 if (likely(!err)) {
1600 u64_stats_inc(&pcounters->packets);
1601 u64_stats_add(&pcounters->bytes, len);
1602 } else {
1603 u64_stats_inc(&pcounters->errors);
1604 }
1605
1606 u64_stats_update_end(&pcounters->syncp);
1607 }
1608
seg6_local_input_core(struct net * net,struct sock * sk,struct sk_buff * skb)1609 static int seg6_local_input_core(struct net *net, struct sock *sk,
1610 struct sk_buff *skb)
1611 {
1612 struct dst_entry *orig_dst = skb_dst(skb);
1613 struct seg6_action_desc *desc;
1614 struct seg6_local_lwt *slwt;
1615 unsigned int len = skb->len;
1616 int rc;
1617
1618 slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
1619 desc = slwt->desc;
1620
1621 rc = desc->input(skb, slwt);
1622
1623 if (!seg6_lwtunnel_counters_enabled(slwt))
1624 return rc;
1625
1626 seg6_local_update_counters(slwt, len, rc);
1627
1628 return rc;
1629 }
1630
seg6_local_input(struct sk_buff * skb)1631 static int seg6_local_input(struct sk_buff *skb)
1632 {
1633 if (skb->protocol != htons(ETH_P_IPV6)) {
1634 kfree_skb(skb);
1635 return -EINVAL;
1636 }
1637
1638 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
1639 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
1640 dev_net(skb->dev), NULL, skb, skb->dev, NULL,
1641 seg6_local_input_core);
1642
1643 return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
1644 }
1645
1646 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
1647 [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
1648 [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
1649 [SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
1650 [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
1651 [SEG6_LOCAL_NH4] = NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)),
1652 [SEG6_LOCAL_NH6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
1653 [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
1654 [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
1655 [SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
1656 [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED },
1657 [SEG6_LOCAL_FLAVORS] = { .type = NLA_NESTED },
1658 };
1659
parse_nla_srh(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1660 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1661 struct netlink_ext_ack *extack)
1662 {
1663 struct ipv6_sr_hdr *srh;
1664 int len;
1665
1666 srh = nla_data(attrs[SEG6_LOCAL_SRH]);
1667 len = nla_len(attrs[SEG6_LOCAL_SRH]);
1668
1669 /* SRH must contain at least one segment */
1670 if (len < sizeof(*srh) + sizeof(struct in6_addr))
1671 return -EINVAL;
1672
1673 if (!seg6_validate_srh(srh, len, false))
1674 return -EINVAL;
1675
1676 slwt->srh = kmemdup(srh, len, GFP_KERNEL);
1677 if (!slwt->srh)
1678 return -ENOMEM;
1679
1680 slwt->headroom += len;
1681
1682 return 0;
1683 }
1684
put_nla_srh(struct sk_buff * skb,struct seg6_local_lwt * slwt)1685 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1686 {
1687 struct ipv6_sr_hdr *srh;
1688 struct nlattr *nla;
1689 int len;
1690
1691 srh = slwt->srh;
1692 len = (srh->hdrlen + 1) << 3;
1693
1694 nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
1695 if (!nla)
1696 return -EMSGSIZE;
1697
1698 memcpy(nla_data(nla), srh, len);
1699
1700 return 0;
1701 }
1702
cmp_nla_srh(struct seg6_local_lwt * a,struct seg6_local_lwt * b)1703 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1704 {
1705 int len = (a->srh->hdrlen + 1) << 3;
1706
1707 if (len != ((b->srh->hdrlen + 1) << 3))
1708 return 1;
1709
1710 return memcmp(a->srh, b->srh, len);
1711 }
1712
destroy_attr_srh(struct seg6_local_lwt * slwt)1713 static void destroy_attr_srh(struct seg6_local_lwt *slwt)
1714 {
1715 kfree(slwt->srh);
1716 }
1717
parse_nla_table(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1718 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1719 struct netlink_ext_ack *extack)
1720 {
1721 slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
1722
1723 return 0;
1724 }
1725
put_nla_table(struct sk_buff * skb,struct seg6_local_lwt * slwt)1726 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1727 {
1728 if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
1729 return -EMSGSIZE;
1730
1731 return 0;
1732 }
1733
cmp_nla_table(struct seg6_local_lwt * a,struct seg6_local_lwt * b)1734 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1735 {
1736 if (a->table != b->table)
1737 return 1;
1738
1739 return 0;
1740 }
1741
1742 static struct
seg6_possible_end_dt_info(struct seg6_local_lwt * slwt)1743 seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
1744 {
1745 #ifdef CONFIG_NET_L3_MASTER_DEV
1746 return &slwt->dt_info;
1747 #else
1748 return ERR_PTR(-EOPNOTSUPP);
1749 #endif
1750 }
1751
parse_nla_vrftable(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1752 static int parse_nla_vrftable(struct nlattr **attrs,
1753 struct seg6_local_lwt *slwt,
1754 struct netlink_ext_ack *extack)
1755 {
1756 struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
1757
1758 if (IS_ERR(info))
1759 return PTR_ERR(info);
1760
1761 info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
1762
1763 return 0;
1764 }
1765
put_nla_vrftable(struct sk_buff * skb,struct seg6_local_lwt * slwt)1766 static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1767 {
1768 struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
1769
1770 if (IS_ERR(info))
1771 return PTR_ERR(info);
1772
1773 if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
1774 return -EMSGSIZE;
1775
1776 return 0;
1777 }
1778
cmp_nla_vrftable(struct seg6_local_lwt * a,struct seg6_local_lwt * b)1779 static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1780 {
1781 struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
1782 struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
1783
1784 if (info_a->vrf_table != info_b->vrf_table)
1785 return 1;
1786
1787 return 0;
1788 }
1789
parse_nla_nh4(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1790 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1791 struct netlink_ext_ack *extack)
1792 {
1793 memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
1794 sizeof(struct in_addr));
1795
1796 return 0;
1797 }
1798
put_nla_nh4(struct sk_buff * skb,struct seg6_local_lwt * slwt)1799 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1800 {
1801 struct nlattr *nla;
1802
1803 nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
1804 if (!nla)
1805 return -EMSGSIZE;
1806
1807 memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
1808
1809 return 0;
1810 }
1811
cmp_nla_nh4(struct seg6_local_lwt * a,struct seg6_local_lwt * b)1812 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1813 {
1814 return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
1815 }
1816
parse_nla_nh6(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1817 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1818 struct netlink_ext_ack *extack)
1819 {
1820 memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
1821 sizeof(struct in6_addr));
1822
1823 return 0;
1824 }
1825
put_nla_nh6(struct sk_buff * skb,struct seg6_local_lwt * slwt)1826 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1827 {
1828 struct nlattr *nla;
1829
1830 nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
1831 if (!nla)
1832 return -EMSGSIZE;
1833
1834 memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
1835
1836 return 0;
1837 }
1838
cmp_nla_nh6(struct seg6_local_lwt * a,struct seg6_local_lwt * b)1839 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1840 {
1841 return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
1842 }
1843
parse_nla_iif(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1844 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1845 struct netlink_ext_ack *extack)
1846 {
1847 slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
1848
1849 return 0;
1850 }
1851
put_nla_iif(struct sk_buff * skb,struct seg6_local_lwt * slwt)1852 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1853 {
1854 if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
1855 return -EMSGSIZE;
1856
1857 return 0;
1858 }
1859
cmp_nla_iif(struct seg6_local_lwt * a,struct seg6_local_lwt * b)1860 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1861 {
1862 if (a->iif != b->iif)
1863 return 1;
1864
1865 return 0;
1866 }
1867
parse_nla_oif(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1868 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1869 struct netlink_ext_ack *extack)
1870 {
1871 slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
1872
1873 return 0;
1874 }
1875
put_nla_oif(struct sk_buff * skb,struct seg6_local_lwt * slwt)1876 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1877 {
1878 if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
1879 return -EMSGSIZE;
1880
1881 return 0;
1882 }
1883
cmp_nla_oif(struct seg6_local_lwt * a,struct seg6_local_lwt * b)1884 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1885 {
1886 if (a->oif != b->oif)
1887 return 1;
1888
1889 return 0;
1890 }
1891
1892 #define MAX_PROG_NAME 256
1893 static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
1894 [SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, },
1895 [SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
1896 .len = MAX_PROG_NAME },
1897 };
1898
parse_nla_bpf(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1899 static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt,
1900 struct netlink_ext_ack *extack)
1901 {
1902 struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
1903 struct bpf_prog *p;
1904 int ret;
1905 u32 fd;
1906
1907 ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
1908 attrs[SEG6_LOCAL_BPF],
1909 bpf_prog_policy, NULL);
1910 if (ret < 0)
1911 return ret;
1912
1913 if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
1914 return -EINVAL;
1915
1916 slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
1917 if (!slwt->bpf.name)
1918 return -ENOMEM;
1919
1920 fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
1921 p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
1922 if (IS_ERR(p)) {
1923 kfree(slwt->bpf.name);
1924 return PTR_ERR(p);
1925 }
1926
1927 slwt->bpf.prog = p;
1928 return 0;
1929 }
1930
put_nla_bpf(struct sk_buff * skb,struct seg6_local_lwt * slwt)1931 static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
1932 {
1933 struct nlattr *nest;
1934
1935 if (!slwt->bpf.prog)
1936 return 0;
1937
1938 nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
1939 if (!nest)
1940 return -EMSGSIZE;
1941
1942 if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
1943 return -EMSGSIZE;
1944
1945 if (slwt->bpf.name &&
1946 nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
1947 return -EMSGSIZE;
1948
1949 return nla_nest_end(skb, nest);
1950 }
1951
cmp_nla_bpf(struct seg6_local_lwt * a,struct seg6_local_lwt * b)1952 static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
1953 {
1954 if (!a->bpf.name && !b->bpf.name)
1955 return 0;
1956
1957 if (!a->bpf.name || !b->bpf.name)
1958 return 1;
1959
1960 return strcmp(a->bpf.name, b->bpf.name);
1961 }
1962
destroy_attr_bpf(struct seg6_local_lwt * slwt)1963 static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
1964 {
1965 kfree(slwt->bpf.name);
1966 if (slwt->bpf.prog)
1967 bpf_prog_put(slwt->bpf.prog);
1968 }
1969
1970 static const struct
1971 nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
1972 [SEG6_LOCAL_CNT_PACKETS] = { .type = NLA_U64 },
1973 [SEG6_LOCAL_CNT_BYTES] = { .type = NLA_U64 },
1974 [SEG6_LOCAL_CNT_ERRORS] = { .type = NLA_U64 },
1975 };
1976
parse_nla_counters(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)1977 static int parse_nla_counters(struct nlattr **attrs,
1978 struct seg6_local_lwt *slwt,
1979 struct netlink_ext_ack *extack)
1980 {
1981 struct pcpu_seg6_local_counters __percpu *pcounters;
1982 struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
1983 int ret;
1984
1985 ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX,
1986 attrs[SEG6_LOCAL_COUNTERS],
1987 seg6_local_counters_policy, NULL);
1988 if (ret < 0)
1989 return ret;
1990
1991 /* basic support for SRv6 Behavior counters requires at least:
1992 * packets, bytes and errors.
1993 */
1994 if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] ||
1995 !tb[SEG6_LOCAL_CNT_ERRORS])
1996 return -EINVAL;
1997
1998 /* counters are always zero initialized */
1999 pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL);
2000 if (!pcounters)
2001 return -ENOMEM;
2002
2003 slwt->pcpu_counters = pcounters;
2004
2005 return 0;
2006 }
2007
seg6_local_fill_nla_counters(struct sk_buff * skb,struct seg6_local_counters * counters)2008 static int seg6_local_fill_nla_counters(struct sk_buff *skb,
2009 struct seg6_local_counters *counters)
2010 {
2011 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets,
2012 SEG6_LOCAL_CNT_PAD))
2013 return -EMSGSIZE;
2014
2015 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes,
2016 SEG6_LOCAL_CNT_PAD))
2017 return -EMSGSIZE;
2018
2019 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors,
2020 SEG6_LOCAL_CNT_PAD))
2021 return -EMSGSIZE;
2022
2023 return 0;
2024 }
2025
put_nla_counters(struct sk_buff * skb,struct seg6_local_lwt * slwt)2026 static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt)
2027 {
2028 struct seg6_local_counters counters = { 0, 0, 0 };
2029 struct nlattr *nest;
2030 int rc, i;
2031
2032 nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS);
2033 if (!nest)
2034 return -EMSGSIZE;
2035
2036 for_each_possible_cpu(i) {
2037 struct pcpu_seg6_local_counters *pcounters;
2038 u64 packets, bytes, errors;
2039 unsigned int start;
2040
2041 pcounters = per_cpu_ptr(slwt->pcpu_counters, i);
2042 do {
2043 start = u64_stats_fetch_begin(&pcounters->syncp);
2044
2045 packets = u64_stats_read(&pcounters->packets);
2046 bytes = u64_stats_read(&pcounters->bytes);
2047 errors = u64_stats_read(&pcounters->errors);
2048
2049 } while (u64_stats_fetch_retry(&pcounters->syncp, start));
2050
2051 counters.packets += packets;
2052 counters.bytes += bytes;
2053 counters.errors += errors;
2054 }
2055
2056 rc = seg6_local_fill_nla_counters(skb, &counters);
2057 if (rc < 0) {
2058 nla_nest_cancel(skb, nest);
2059 return rc;
2060 }
2061
2062 return nla_nest_end(skb, nest);
2063 }
2064
cmp_nla_counters(struct seg6_local_lwt * a,struct seg6_local_lwt * b)2065 static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
2066 {
2067 /* a and b are equal if both have pcpu_counters set or not */
2068 return (!!((unsigned long)a->pcpu_counters)) ^
2069 (!!((unsigned long)b->pcpu_counters));
2070 }
2071
destroy_attr_counters(struct seg6_local_lwt * slwt)2072 static void destroy_attr_counters(struct seg6_local_lwt *slwt)
2073 {
2074 free_percpu(slwt->pcpu_counters);
2075 }
2076
2077 static const
2078 struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = {
2079 [SEG6_LOCAL_FLV_OPERATION] = { .type = NLA_U32 },
2080 [SEG6_LOCAL_FLV_LCBLOCK_BITS] = { .type = NLA_U8 },
2081 [SEG6_LOCAL_FLV_LCNODE_FN_BITS] = { .type = NLA_U8 },
2082 };
2083
2084 /* check whether the lengths of the Locator-Block and Locator-Node Function
2085 * are compatible with the dimension of a C-SID container.
2086 */
seg6_chk_next_csid_cfg(__u8 block_len,__u8 func_len)2087 static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len)
2088 {
2089 /* Locator-Block and Locator-Node Function cannot exceed 128 bits
2090 * (i.e. C-SID container length).
2091 */
2092 if (next_csid_chk_cntr_bits(block_len, func_len))
2093 return -EINVAL;
2094
2095 /* Locator-Block length must be greater than zero and evenly divisible
2096 * by 8. There must be room for a Locator-Node Function, at least.
2097 */
2098 if (next_csid_chk_lcblock_bits(block_len))
2099 return -EINVAL;
2100
2101 /* Locator-Node Function length must be greater than zero and evenly
2102 * divisible by 8. There must be room for the Locator-Block.
2103 */
2104 if (next_csid_chk_lcnode_fn_bits(func_len))
2105 return -EINVAL;
2106
2107 return 0;
2108 }
2109
seg6_parse_nla_next_csid_cfg(struct nlattr ** tb,struct seg6_flavors_info * finfo,struct netlink_ext_ack * extack)2110 static int seg6_parse_nla_next_csid_cfg(struct nlattr **tb,
2111 struct seg6_flavors_info *finfo,
2112 struct netlink_ext_ack *extack)
2113 {
2114 __u8 func_len = SEG6_LOCAL_LCNODE_FN_DBITS;
2115 __u8 block_len = SEG6_LOCAL_LCBLOCK_DBITS;
2116 int rc;
2117
2118 if (tb[SEG6_LOCAL_FLV_LCBLOCK_BITS])
2119 block_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]);
2120
2121 if (tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS])
2122 func_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]);
2123
2124 rc = seg6_chk_next_csid_cfg(block_len, func_len);
2125 if (rc < 0) {
2126 NL_SET_ERR_MSG(extack,
2127 "Invalid Locator Block/Node Function lengths");
2128 return rc;
2129 }
2130
2131 finfo->lcblock_bits = block_len;
2132 finfo->lcnode_func_bits = func_len;
2133
2134 return 0;
2135 }
2136
parse_nla_flavors(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)2137 static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
2138 struct netlink_ext_ack *extack)
2139 {
2140 struct seg6_flavors_info *finfo = &slwt->flv_info;
2141 struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
2142 int action = slwt->action;
2143 __u32 fops, supp_fops;
2144 int rc;
2145
2146 rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
2147 attrs[SEG6_LOCAL_FLAVORS],
2148 seg6_local_flavors_policy, NULL);
2149 if (rc < 0)
2150 return rc;
2151
2152 /* this attribute MUST always be present since it represents the Flavor
2153 * operation(s) to be carried out.
2154 */
2155 if (!tb[SEG6_LOCAL_FLV_OPERATION])
2156 return -EINVAL;
2157
2158 fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
2159 rc = seg6_flv_supp_ops_by_action(action, &supp_fops);
2160 if (rc < 0 || (fops & ~supp_fops)) {
2161 NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
2162 return -EOPNOTSUPP;
2163 }
2164
2165 finfo->flv_ops = fops;
2166
2167 if (seg6_next_csid_enabled(fops)) {
2168 /* Locator-Block and Locator-Node Function lengths can be
2169 * provided by the user space. Otherwise, default values are
2170 * applied.
2171 */
2172 rc = seg6_parse_nla_next_csid_cfg(tb, finfo, extack);
2173 if (rc < 0)
2174 return rc;
2175 }
2176
2177 return 0;
2178 }
2179
seg6_fill_nla_next_csid_cfg(struct sk_buff * skb,struct seg6_flavors_info * finfo)2180 static int seg6_fill_nla_next_csid_cfg(struct sk_buff *skb,
2181 struct seg6_flavors_info *finfo)
2182 {
2183 if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCBLOCK_BITS, finfo->lcblock_bits))
2184 return -EMSGSIZE;
2185
2186 if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCNODE_FN_BITS,
2187 finfo->lcnode_func_bits))
2188 return -EMSGSIZE;
2189
2190 return 0;
2191 }
2192
put_nla_flavors(struct sk_buff * skb,struct seg6_local_lwt * slwt)2193 static int put_nla_flavors(struct sk_buff *skb, struct seg6_local_lwt *slwt)
2194 {
2195 struct seg6_flavors_info *finfo = &slwt->flv_info;
2196 __u32 fops = finfo->flv_ops;
2197 struct nlattr *nest;
2198 int rc;
2199
2200 nest = nla_nest_start(skb, SEG6_LOCAL_FLAVORS);
2201 if (!nest)
2202 return -EMSGSIZE;
2203
2204 if (nla_put_u32(skb, SEG6_LOCAL_FLV_OPERATION, fops)) {
2205 rc = -EMSGSIZE;
2206 goto err;
2207 }
2208
2209 if (seg6_next_csid_enabled(fops)) {
2210 rc = seg6_fill_nla_next_csid_cfg(skb, finfo);
2211 if (rc < 0)
2212 goto err;
2213 }
2214
2215 return nla_nest_end(skb, nest);
2216
2217 err:
2218 nla_nest_cancel(skb, nest);
2219 return rc;
2220 }
2221
seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info * finfo_a,struct seg6_flavors_info * finfo_b)2222 static int seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info *finfo_a,
2223 struct seg6_flavors_info *finfo_b)
2224 {
2225 if (finfo_a->lcblock_bits != finfo_b->lcblock_bits)
2226 return 1;
2227
2228 if (finfo_a->lcnode_func_bits != finfo_b->lcnode_func_bits)
2229 return 1;
2230
2231 return 0;
2232 }
2233
cmp_nla_flavors(struct seg6_local_lwt * a,struct seg6_local_lwt * b)2234 static int cmp_nla_flavors(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
2235 {
2236 struct seg6_flavors_info *finfo_a = &a->flv_info;
2237 struct seg6_flavors_info *finfo_b = &b->flv_info;
2238
2239 if (finfo_a->flv_ops != finfo_b->flv_ops)
2240 return 1;
2241
2242 if (seg6_next_csid_enabled(finfo_a->flv_ops)) {
2243 if (seg6_cmp_nla_next_csid_cfg(finfo_a, finfo_b))
2244 return 1;
2245 }
2246
2247 return 0;
2248 }
2249
encap_size_flavors(struct seg6_local_lwt * slwt)2250 static int encap_size_flavors(struct seg6_local_lwt *slwt)
2251 {
2252 struct seg6_flavors_info *finfo = &slwt->flv_info;
2253 int nlsize;
2254
2255 nlsize = nla_total_size(0) + /* nest SEG6_LOCAL_FLAVORS */
2256 nla_total_size(4); /* SEG6_LOCAL_FLV_OPERATION */
2257
2258 if (seg6_next_csid_enabled(finfo->flv_ops))
2259 nlsize += nla_total_size(1) + /* SEG6_LOCAL_FLV_LCBLOCK_BITS */
2260 nla_total_size(1); /* SEG6_LOCAL_FLV_LCNODE_FN_BITS */
2261
2262 return nlsize;
2263 }
2264
2265 struct seg6_action_param {
2266 int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt,
2267 struct netlink_ext_ack *extack);
2268 int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
2269 int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
2270
2271 /* optional destroy() callback useful for releasing resources which
2272 * have been previously acquired in the corresponding parse()
2273 * function.
2274 */
2275 void (*destroy)(struct seg6_local_lwt *slwt);
2276 };
2277
2278 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
2279 [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
2280 .put = put_nla_srh,
2281 .cmp = cmp_nla_srh,
2282 .destroy = destroy_attr_srh },
2283
2284 [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
2285 .put = put_nla_table,
2286 .cmp = cmp_nla_table },
2287
2288 [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
2289 .put = put_nla_nh4,
2290 .cmp = cmp_nla_nh4 },
2291
2292 [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
2293 .put = put_nla_nh6,
2294 .cmp = cmp_nla_nh6 },
2295
2296 [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
2297 .put = put_nla_iif,
2298 .cmp = cmp_nla_iif },
2299
2300 [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
2301 .put = put_nla_oif,
2302 .cmp = cmp_nla_oif },
2303
2304 [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
2305 .put = put_nla_bpf,
2306 .cmp = cmp_nla_bpf,
2307 .destroy = destroy_attr_bpf },
2308
2309 [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable,
2310 .put = put_nla_vrftable,
2311 .cmp = cmp_nla_vrftable },
2312
2313 [SEG6_LOCAL_COUNTERS] = { .parse = parse_nla_counters,
2314 .put = put_nla_counters,
2315 .cmp = cmp_nla_counters,
2316 .destroy = destroy_attr_counters },
2317
2318 [SEG6_LOCAL_FLAVORS] = { .parse = parse_nla_flavors,
2319 .put = put_nla_flavors,
2320 .cmp = cmp_nla_flavors },
2321 };
2322
2323 /* call the destroy() callback (if available) for each set attribute in
2324 * @parsed_attrs, starting from the first attribute up to the @max_parsed
2325 * (excluded) attribute.
2326 */
__destroy_attrs(unsigned long parsed_attrs,int max_parsed,struct seg6_local_lwt * slwt)2327 static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
2328 struct seg6_local_lwt *slwt)
2329 {
2330 struct seg6_action_param *param;
2331 int i;
2332
2333 /* Every required seg6local attribute is identified by an ID which is
2334 * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
2335 *
2336 * We scan the 'parsed_attrs' bitmask, starting from the first attribute
2337 * up to the @max_parsed (excluded) attribute.
2338 * For each set attribute, we retrieve the corresponding destroy()
2339 * callback. If the callback is not available, then we skip to the next
2340 * attribute; otherwise, we call the destroy() callback.
2341 */
2342 for (i = SEG6_LOCAL_SRH; i < max_parsed; ++i) {
2343 if (!(parsed_attrs & SEG6_F_ATTR(i)))
2344 continue;
2345
2346 param = &seg6_action_params[i];
2347
2348 if (param->destroy)
2349 param->destroy(slwt);
2350 }
2351 }
2352
2353 /* release all the resources that may have been acquired during parsing
2354 * operations.
2355 */
destroy_attrs(struct seg6_local_lwt * slwt)2356 static void destroy_attrs(struct seg6_local_lwt *slwt)
2357 {
2358 unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2359
2360 __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
2361 }
2362
parse_nla_optional_attrs(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)2363 static int parse_nla_optional_attrs(struct nlattr **attrs,
2364 struct seg6_local_lwt *slwt,
2365 struct netlink_ext_ack *extack)
2366 {
2367 struct seg6_action_desc *desc = slwt->desc;
2368 unsigned long parsed_optattrs = 0;
2369 struct seg6_action_param *param;
2370 int err, i;
2371
2372 for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; ++i) {
2373 if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i])
2374 continue;
2375
2376 /* once here, the i-th attribute is provided by the
2377 * userspace AND it is identified optional as well.
2378 */
2379 param = &seg6_action_params[i];
2380
2381 err = param->parse(attrs, slwt, extack);
2382 if (err < 0)
2383 goto parse_optattrs_err;
2384
2385 /* current attribute has been correctly parsed */
2386 parsed_optattrs |= SEG6_F_ATTR(i);
2387 }
2388
2389 /* store in the tunnel state all the optional attributed successfully
2390 * parsed.
2391 */
2392 slwt->parsed_optattrs = parsed_optattrs;
2393
2394 return 0;
2395
2396 parse_optattrs_err:
2397 __destroy_attrs(parsed_optattrs, i, slwt);
2398
2399 return err;
2400 }
2401
2402 /* call the custom constructor of the behavior during its initialization phase
2403 * and after that all its attributes have been parsed successfully.
2404 */
2405 static int
seg6_local_lwtunnel_build_state(struct seg6_local_lwt * slwt,const void * cfg,struct netlink_ext_ack * extack)2406 seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
2407 struct netlink_ext_ack *extack)
2408 {
2409 struct seg6_action_desc *desc = slwt->desc;
2410 struct seg6_local_lwtunnel_ops *ops;
2411
2412 ops = &desc->slwt_ops;
2413 if (!ops->build_state)
2414 return 0;
2415
2416 return ops->build_state(slwt, cfg, extack);
2417 }
2418
2419 /* call the custom destructor of the behavior which is invoked before the
2420 * tunnel is going to be destroyed.
2421 */
seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt * slwt)2422 static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
2423 {
2424 struct seg6_action_desc *desc = slwt->desc;
2425 struct seg6_local_lwtunnel_ops *ops;
2426
2427 ops = &desc->slwt_ops;
2428 if (!ops->destroy_state)
2429 return;
2430
2431 ops->destroy_state(slwt);
2432 }
2433
parse_nla_action(struct nlattr ** attrs,struct seg6_local_lwt * slwt,struct netlink_ext_ack * extack)2434 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt,
2435 struct netlink_ext_ack *extack)
2436 {
2437 struct seg6_action_param *param;
2438 struct seg6_action_desc *desc;
2439 unsigned long invalid_attrs;
2440 int i, err;
2441
2442 desc = __get_action_desc(slwt->action);
2443 if (!desc)
2444 return -EINVAL;
2445
2446 if (!desc->input)
2447 return -EOPNOTSUPP;
2448
2449 slwt->desc = desc;
2450 slwt->headroom += desc->static_headroom;
2451
2452 /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
2453 * disjoined, this allow us to release acquired resources by optional
2454 * attributes and by required attributes independently from each other
2455 * without any interference.
2456 * In other terms, we are sure that we do not release some the acquired
2457 * resources twice.
2458 *
2459 * Note that if an attribute is configured both as required and as
2460 * optional, it means that the user has messed something up in the
2461 * seg6_action_table. Therefore, this check is required for SRv6
2462 * behaviors to work properly.
2463 */
2464 invalid_attrs = desc->attrs & desc->optattrs;
2465 if (invalid_attrs) {
2466 WARN_ONCE(1,
2467 "An attribute cannot be both required AND optional");
2468 return -EINVAL;
2469 }
2470
2471 /* parse the required attributes */
2472 for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2473 if (desc->attrs & SEG6_F_ATTR(i)) {
2474 if (!attrs[i])
2475 return -EINVAL;
2476
2477 param = &seg6_action_params[i];
2478
2479 err = param->parse(attrs, slwt, extack);
2480 if (err < 0)
2481 goto parse_attrs_err;
2482 }
2483 }
2484
2485 /* parse the optional attributes, if any */
2486 err = parse_nla_optional_attrs(attrs, slwt, extack);
2487 if (err < 0)
2488 goto parse_attrs_err;
2489
2490 return 0;
2491
2492 parse_attrs_err:
2493 /* release any resource that may have been acquired during the i-1
2494 * parse() operations.
2495 */
2496 __destroy_attrs(desc->attrs, i, slwt);
2497
2498 return err;
2499 }
2500
seg6_local_build_state(struct net * net,struct nlattr * nla,unsigned int family,const void * cfg,struct lwtunnel_state ** ts,struct netlink_ext_ack * extack)2501 static int seg6_local_build_state(struct net *net, struct nlattr *nla,
2502 unsigned int family, const void *cfg,
2503 struct lwtunnel_state **ts,
2504 struct netlink_ext_ack *extack)
2505 {
2506 struct nlattr *tb[SEG6_LOCAL_MAX + 1];
2507 struct lwtunnel_state *newts;
2508 struct seg6_local_lwt *slwt;
2509 int err;
2510
2511 if (family != AF_INET6)
2512 return -EINVAL;
2513
2514 err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
2515 seg6_local_policy, extack);
2516
2517 if (err < 0)
2518 return err;
2519
2520 if (!tb[SEG6_LOCAL_ACTION])
2521 return -EINVAL;
2522
2523 newts = lwtunnel_state_alloc(sizeof(*slwt));
2524 if (!newts)
2525 return -ENOMEM;
2526
2527 slwt = seg6_local_lwtunnel(newts);
2528 slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
2529
2530 err = parse_nla_action(tb, slwt, extack);
2531 if (err < 0)
2532 goto out_free;
2533
2534 err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
2535 if (err < 0)
2536 goto out_destroy_attrs;
2537
2538 newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
2539 newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
2540 newts->headroom = slwt->headroom;
2541
2542 *ts = newts;
2543
2544 return 0;
2545
2546 out_destroy_attrs:
2547 destroy_attrs(slwt);
2548 out_free:
2549 kfree(newts);
2550 return err;
2551 }
2552
seg6_local_destroy_state(struct lwtunnel_state * lwt)2553 static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
2554 {
2555 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2556
2557 seg6_local_lwtunnel_destroy_state(slwt);
2558
2559 destroy_attrs(slwt);
2560
2561 return;
2562 }
2563
seg6_local_fill_encap(struct sk_buff * skb,struct lwtunnel_state * lwt)2564 static int seg6_local_fill_encap(struct sk_buff *skb,
2565 struct lwtunnel_state *lwt)
2566 {
2567 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2568 struct seg6_action_param *param;
2569 unsigned long attrs;
2570 int i, err;
2571
2572 if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
2573 return -EMSGSIZE;
2574
2575 attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2576
2577 for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2578 if (attrs & SEG6_F_ATTR(i)) {
2579 param = &seg6_action_params[i];
2580 err = param->put(skb, slwt);
2581 if (err < 0)
2582 return err;
2583 }
2584 }
2585
2586 return 0;
2587 }
2588
seg6_local_get_encap_size(struct lwtunnel_state * lwt)2589 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
2590 {
2591 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
2592 unsigned long attrs;
2593 int nlsize;
2594
2595 nlsize = nla_total_size(4); /* action */
2596
2597 attrs = slwt->desc->attrs | slwt->parsed_optattrs;
2598
2599 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH))
2600 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
2601
2602 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE))
2603 nlsize += nla_total_size(4);
2604
2605 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4))
2606 nlsize += nla_total_size(4);
2607
2608 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6))
2609 nlsize += nla_total_size(16);
2610
2611 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF))
2612 nlsize += nla_total_size(4);
2613
2614 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF))
2615 nlsize += nla_total_size(4);
2616
2617 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF))
2618 nlsize += nla_total_size(sizeof(struct nlattr)) +
2619 nla_total_size(MAX_PROG_NAME) +
2620 nla_total_size(4);
2621
2622 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
2623 nlsize += nla_total_size(4);
2624
2625 if (attrs & SEG6_F_LOCAL_COUNTERS)
2626 nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */
2627 /* SEG6_LOCAL_CNT_PACKETS */
2628 nla_total_size_64bit(sizeof(__u64)) +
2629 /* SEG6_LOCAL_CNT_BYTES */
2630 nla_total_size_64bit(sizeof(__u64)) +
2631 /* SEG6_LOCAL_CNT_ERRORS */
2632 nla_total_size_64bit(sizeof(__u64));
2633
2634 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_FLAVORS))
2635 nlsize += encap_size_flavors(slwt);
2636
2637 return nlsize;
2638 }
2639
seg6_local_cmp_encap(struct lwtunnel_state * a,struct lwtunnel_state * b)2640 static int seg6_local_cmp_encap(struct lwtunnel_state *a,
2641 struct lwtunnel_state *b)
2642 {
2643 struct seg6_local_lwt *slwt_a, *slwt_b;
2644 struct seg6_action_param *param;
2645 unsigned long attrs_a, attrs_b;
2646 int i;
2647
2648 slwt_a = seg6_local_lwtunnel(a);
2649 slwt_b = seg6_local_lwtunnel(b);
2650
2651 if (slwt_a->action != slwt_b->action)
2652 return 1;
2653
2654 attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
2655 attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
2656
2657 if (attrs_a != attrs_b)
2658 return 1;
2659
2660 for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
2661 if (attrs_a & SEG6_F_ATTR(i)) {
2662 param = &seg6_action_params[i];
2663 if (param->cmp(slwt_a, slwt_b))
2664 return 1;
2665 }
2666 }
2667
2668 return 0;
2669 }
2670
2671 static const struct lwtunnel_encap_ops seg6_local_ops = {
2672 .build_state = seg6_local_build_state,
2673 .destroy_state = seg6_local_destroy_state,
2674 .input = seg6_local_input,
2675 .fill_encap = seg6_local_fill_encap,
2676 .get_encap_size = seg6_local_get_encap_size,
2677 .cmp_encap = seg6_local_cmp_encap,
2678 .owner = THIS_MODULE,
2679 };
2680
seg6_local_init(void)2681 int __init seg6_local_init(void)
2682 {
2683 /* If the max total number of defined attributes is reached, then your
2684 * kernel build stops here.
2685 *
2686 * This check is required to avoid arithmetic overflows when processing
2687 * behavior attributes and the maximum number of defined attributes
2688 * exceeds the allowed value.
2689 */
2690 BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
2691
2692 /* Check whether the number of defined flavors exceeds the maximum
2693 * allowed value.
2694 */
2695 BUILD_BUG_ON(SEG6_LOCAL_FLV_OP_MAX + 1 > BITS_PER_TYPE(__u32));
2696
2697 /* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
2698 * bits) have been changed with invalid values, kernel build stops
2699 * here.
2700 */
2701 BUILD_BUG_ON(next_csid_chk_cntr_bits(SEG6_LOCAL_LCBLOCK_DBITS,
2702 SEG6_LOCAL_LCNODE_FN_DBITS));
2703 BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
2704 BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
2705
2706 /* To be memory efficient, we use 'u8' to represent the different
2707 * actions related to RFC8986 flavors. If the kernel build stops here,
2708 * it means that it is not possible to correctly encode these actions
2709 * with the data type chosen for the action table.
2710 */
2711 BUILD_BUG_ON(SEG6_LOCAL_FLV_ACT_MAX > (typeof(flv8986_act_tbl[0]))~0U);
2712
2713 return lwtunnel_encap_add_ops(&seg6_local_ops,
2714 LWTUNNEL_ENCAP_SEG6_LOCAL);
2715 }
2716
seg6_local_exit(void)2717 void seg6_local_exit(void)
2718 {
2719 lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
2720 }
2721