1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * SR-IPv6 implementation
4 *
5 * Author:
6 * David Lebrun <david.lebrun@uclouvain.be>
7 */
8
9 #include <linux/types.h>
10 #include <linux/skbuff.h>
11 #include <linux/net.h>
12 #include <linux/module.h>
13 #include <net/ip.h>
14 #include <net/ip_tunnels.h>
15 #include <net/lwtunnel.h>
16 #include <net/netevent.h>
17 #include <net/netns/generic.h>
18 #include <net/ip6_fib.h>
19 #include <net/route.h>
20 #include <net/seg6.h>
21 #include <linux/seg6.h>
22 #include <linux/seg6_iptunnel.h>
23 #include <net/addrconf.h>
24 #include <net/ip6_route.h>
25 #include <net/dst_cache.h>
26 #ifdef CONFIG_IPV6_SEG6_HMAC
27 #include <net/seg6_hmac.h>
28 #endif
29 #include <linux/netfilter.h>
30
seg6_lwt_headroom(struct seg6_iptunnel_encap * tuninfo)31 static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
32 {
33 int head = 0;
34
35 switch (tuninfo->mode) {
36 case SEG6_IPTUN_MODE_INLINE:
37 break;
38 case SEG6_IPTUN_MODE_ENCAP:
39 case SEG6_IPTUN_MODE_ENCAP_RED:
40 head = sizeof(struct ipv6hdr);
41 break;
42 case SEG6_IPTUN_MODE_L2ENCAP:
43 case SEG6_IPTUN_MODE_L2ENCAP_RED:
44 return 0;
45 }
46
47 return ((tuninfo->srh->hdrlen + 1) << 3) + head;
48 }
49
50 struct seg6_lwt {
51 struct dst_cache cache_input;
52 struct dst_cache cache_output;
53 struct in6_addr tunsrc;
54 struct seg6_iptunnel_encap tuninfo[];
55 };
56
seg6_lwt_lwtunnel(struct lwtunnel_state * lwt)57 static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
58 {
59 return (struct seg6_lwt *)lwt->data;
60 }
61
62 static inline struct seg6_iptunnel_encap *
seg6_encap_lwtunnel(struct lwtunnel_state * lwt)63 seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
64 {
65 return seg6_lwt_lwtunnel(lwt)->tuninfo;
66 }
67
68 static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
69 [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY },
70 [SEG6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
71 };
72
nla_put_srh(struct sk_buff * skb,int attrtype,struct seg6_iptunnel_encap * tuninfo)73 static int nla_put_srh(struct sk_buff *skb, int attrtype,
74 struct seg6_iptunnel_encap *tuninfo)
75 {
76 struct seg6_iptunnel_encap *data;
77 struct nlattr *nla;
78 int len;
79
80 len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
81
82 nla = nla_reserve(skb, attrtype, len);
83 if (!nla)
84 return -EMSGSIZE;
85
86 data = nla_data(nla);
87 memcpy(data, tuninfo, len);
88
89 return 0;
90 }
91
set_tun_src(struct net * net,struct net_device * dev,struct in6_addr * daddr,struct in6_addr * saddr,struct in6_addr * route_tunsrc)92 static void set_tun_src(struct net *net, struct net_device *dev,
93 struct in6_addr *daddr, struct in6_addr *saddr,
94 struct in6_addr *route_tunsrc)
95 {
96 struct seg6_pernet_data *sdata = seg6_pernet(net);
97 struct in6_addr *tun_src;
98
99 /* Priority order to select tunnel source address:
100 * 1. per route source address (if configured)
101 * 2. per network namespace source address (if configured)
102 * 3. dynamic resolution
103 */
104 if (route_tunsrc && !ipv6_addr_any(route_tunsrc)) {
105 memcpy(saddr, route_tunsrc, sizeof(struct in6_addr));
106 } else {
107 rcu_read_lock();
108 tun_src = rcu_dereference(sdata->tun_src);
109
110 if (!ipv6_addr_any(tun_src)) {
111 memcpy(saddr, tun_src, sizeof(struct in6_addr));
112 } else {
113 ipv6_dev_get_saddr(net, dev, daddr,
114 IPV6_PREFER_SRC_PUBLIC, saddr);
115 }
116
117 rcu_read_unlock();
118 }
119 }
120
121 /* Compute flowlabel for outer IPv6 header */
seg6_make_flowlabel(struct net * net,struct sk_buff * skb,struct ipv6hdr * inner_hdr)122 static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
123 struct ipv6hdr *inner_hdr)
124 {
125 int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
126 __be32 flowlabel = 0;
127 u32 hash;
128
129 if (do_flowlabel > 0) {
130 hash = skb_get_hash(skb);
131 hash = rol32(hash, 16);
132 flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
133 } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
134 flowlabel = ip6_flowlabel(inner_hdr);
135 }
136 return flowlabel;
137 }
138
__seg6_do_srh_encap(struct sk_buff * skb,struct ipv6_sr_hdr * osrh,int proto,struct dst_entry * cache_dst,struct in6_addr * route_tunsrc)139 static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
140 int proto, struct dst_entry *cache_dst,
141 struct in6_addr *route_tunsrc)
142 {
143 struct dst_entry *dst = skb_dst(skb);
144 struct net_device *dev = dst_dev(dst);
145 struct net *net = dev_net(dev);
146 struct ipv6hdr *hdr, *inner_hdr;
147 struct ipv6_sr_hdr *isrh;
148 int hdrlen, tot_len, err;
149 __be32 flowlabel;
150
151 hdrlen = (osrh->hdrlen + 1) << 3;
152 tot_len = hdrlen + sizeof(*hdr);
153
154 err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
155 if (unlikely(err))
156 return err;
157
158 inner_hdr = ipv6_hdr(skb);
159 flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
160
161 skb_push(skb, tot_len);
162 skb_reset_network_header(skb);
163 skb_mac_header_rebuild(skb);
164 hdr = ipv6_hdr(skb);
165
166 /* inherit tc, flowlabel and hlim
167 * hlim will be decremented in ip6_forward() afterwards and
168 * decapsulation will overwrite inner hlim with outer hlim
169 */
170
171 if (skb->protocol == htons(ETH_P_IPV6)) {
172 ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
173 flowlabel);
174 hdr->hop_limit = inner_hdr->hop_limit;
175 } else {
176 ip6_flow_hdr(hdr, 0, flowlabel);
177 hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
178
179 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
180
181 /* the control block has been erased, so we have to set the
182 * iif once again.
183 * We read the receiving interface index directly from the
184 * skb->skb_iif as it is done in the IPv4 receiving path (i.e.:
185 * ip_rcv_core(...)).
186 */
187 IP6CB(skb)->iif = skb->skb_iif;
188 }
189
190 hdr->nexthdr = NEXTHDR_ROUTING;
191
192 isrh = (void *)hdr + sizeof(*hdr);
193 memcpy(isrh, osrh, hdrlen);
194
195 isrh->nexthdr = proto;
196
197 hdr->daddr = isrh->segments[isrh->first_segment];
198 set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc);
199
200 #ifdef CONFIG_IPV6_SEG6_HMAC
201 if (sr_has_hmac(isrh)) {
202 err = seg6_push_hmac(net, &hdr->saddr, isrh);
203 if (unlikely(err))
204 return err;
205 }
206 #endif
207
208 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
209
210 skb_postpush_rcsum(skb, hdr, tot_len);
211
212 return 0;
213 }
214
215 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
seg6_do_srh_encap(struct sk_buff * skb,struct ipv6_sr_hdr * osrh,int proto)216 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
217 {
218 return __seg6_do_srh_encap(skb, osrh, proto, NULL, NULL);
219 }
220 EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
221
222 /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
seg6_do_srh_encap_red(struct sk_buff * skb,struct ipv6_sr_hdr * osrh,int proto,struct dst_entry * cache_dst,struct in6_addr * route_tunsrc)223 static int seg6_do_srh_encap_red(struct sk_buff *skb,
224 struct ipv6_sr_hdr *osrh, int proto,
225 struct dst_entry *cache_dst,
226 struct in6_addr *route_tunsrc)
227 {
228 __u8 first_seg = osrh->first_segment;
229 struct dst_entry *dst = skb_dst(skb);
230 struct net_device *dev = dst_dev(dst);
231 struct net *net = dev_net(dev);
232 struct ipv6hdr *hdr, *inner_hdr;
233 int hdrlen = ipv6_optlen(osrh);
234 int red_tlv_offset, tlv_offset;
235 struct ipv6_sr_hdr *isrh;
236 bool skip_srh = false;
237 __be32 flowlabel;
238 int tot_len, err;
239 int red_hdrlen;
240 int tlvs_len;
241
242 if (first_seg > 0) {
243 red_hdrlen = hdrlen - sizeof(struct in6_addr);
244 } else {
245 /* NOTE: if tag/flags and/or other TLVs are introduced in the
246 * seg6_iptunnel infrastructure, they should be considered when
247 * deciding to skip the SRH.
248 */
249 skip_srh = !sr_has_hmac(osrh);
250
251 red_hdrlen = skip_srh ? 0 : hdrlen;
252 }
253
254 tot_len = red_hdrlen + sizeof(struct ipv6hdr);
255
256 err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
257 if (unlikely(err))
258 return err;
259
260 inner_hdr = ipv6_hdr(skb);
261 flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
262
263 skb_push(skb, tot_len);
264 skb_reset_network_header(skb);
265 skb_mac_header_rebuild(skb);
266 hdr = ipv6_hdr(skb);
267
268 /* based on seg6_do_srh_encap() */
269 if (skb->protocol == htons(ETH_P_IPV6)) {
270 ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
271 flowlabel);
272 hdr->hop_limit = inner_hdr->hop_limit;
273 } else {
274 ip6_flow_hdr(hdr, 0, flowlabel);
275 hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
276
277 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
278 IP6CB(skb)->iif = skb->skb_iif;
279 }
280
281 /* no matter if we have to skip the SRH or not, the first segment
282 * always comes in the pushed IPv6 header.
283 */
284 hdr->daddr = osrh->segments[first_seg];
285
286 if (skip_srh) {
287 hdr->nexthdr = proto;
288
289 set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc);
290 goto out;
291 }
292
293 /* we cannot skip the SRH, slow path */
294
295 hdr->nexthdr = NEXTHDR_ROUTING;
296 isrh = (void *)hdr + sizeof(struct ipv6hdr);
297
298 if (unlikely(!first_seg)) {
299 /* this is a very rare case; we have only one SID but
300 * we cannot skip the SRH since we are carrying some
301 * other info.
302 */
303 memcpy(isrh, osrh, hdrlen);
304 goto srcaddr;
305 }
306
307 tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr);
308 red_tlv_offset = tlv_offset - sizeof(struct in6_addr);
309
310 memcpy(isrh, osrh, red_tlv_offset);
311
312 tlvs_len = hdrlen - tlv_offset;
313 if (unlikely(tlvs_len > 0)) {
314 const void *s = (const void *)osrh + tlv_offset;
315 void *d = (void *)isrh + red_tlv_offset;
316
317 memcpy(d, s, tlvs_len);
318 }
319
320 --isrh->first_segment;
321 isrh->hdrlen -= 2;
322
323 srcaddr:
324 isrh->nexthdr = proto;
325 set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc);
326
327 #ifdef CONFIG_IPV6_SEG6_HMAC
328 if (unlikely(!skip_srh && sr_has_hmac(isrh))) {
329 err = seg6_push_hmac(net, &hdr->saddr, isrh);
330 if (unlikely(err))
331 return err;
332 }
333 #endif
334
335 out:
336 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
337
338 skb_postpush_rcsum(skb, hdr, tot_len);
339
340 return 0;
341 }
342
__seg6_do_srh_inline(struct sk_buff * skb,struct ipv6_sr_hdr * osrh,struct dst_entry * cache_dst)343 static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
344 struct dst_entry *cache_dst)
345 {
346 struct ipv6hdr *hdr, *oldhdr;
347 struct ipv6_sr_hdr *isrh;
348 int hdrlen, err;
349
350 hdrlen = (osrh->hdrlen + 1) << 3;
351
352 err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
353 if (unlikely(err))
354 return err;
355
356 oldhdr = ipv6_hdr(skb);
357
358 skb_pull(skb, sizeof(struct ipv6hdr));
359 skb_postpull_rcsum(skb, skb_network_header(skb),
360 sizeof(struct ipv6hdr));
361
362 skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
363 skb_reset_network_header(skb);
364 skb_mac_header_rebuild(skb);
365
366 hdr = ipv6_hdr(skb);
367
368 memmove(hdr, oldhdr, sizeof(*hdr));
369
370 isrh = (void *)hdr + sizeof(*hdr);
371 memcpy(isrh, osrh, hdrlen);
372
373 isrh->nexthdr = hdr->nexthdr;
374 hdr->nexthdr = NEXTHDR_ROUTING;
375
376 isrh->segments[0] = hdr->daddr;
377 hdr->daddr = isrh->segments[isrh->first_segment];
378
379 #ifdef CONFIG_IPV6_SEG6_HMAC
380 if (sr_has_hmac(isrh)) {
381 struct net *net = skb_dst_dev_net(skb);
382
383 err = seg6_push_hmac(net, &hdr->saddr, isrh);
384 if (unlikely(err))
385 return err;
386 }
387 #endif
388
389 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
390
391 skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
392
393 return 0;
394 }
395
seg6_do_srh(struct sk_buff * skb,struct dst_entry * cache_dst)396 static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
397 {
398 struct dst_entry *dst = skb_dst(skb);
399 struct seg6_iptunnel_encap *tinfo;
400 struct seg6_lwt *slwt;
401 int proto, err = 0;
402
403 slwt = seg6_lwt_lwtunnel(dst->lwtstate);
404 tinfo = slwt->tuninfo;
405
406 switch (tinfo->mode) {
407 case SEG6_IPTUN_MODE_INLINE:
408 if (skb->protocol != htons(ETH_P_IPV6))
409 return -EINVAL;
410
411 err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
412 if (err)
413 return err;
414 break;
415 case SEG6_IPTUN_MODE_ENCAP:
416 case SEG6_IPTUN_MODE_ENCAP_RED:
417 err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
418 if (err)
419 return err;
420
421 if (skb->protocol == htons(ETH_P_IPV6))
422 proto = IPPROTO_IPV6;
423 else if (skb->protocol == htons(ETH_P_IP))
424 proto = IPPROTO_IPIP;
425 else
426 return -EINVAL;
427
428 if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
429 err = __seg6_do_srh_encap(skb, tinfo->srh, proto,
430 cache_dst, &slwt->tunsrc);
431 else
432 err = seg6_do_srh_encap_red(skb, tinfo->srh, proto,
433 cache_dst, &slwt->tunsrc);
434
435 if (err)
436 return err;
437
438 skb_set_inner_transport_header(skb, skb_transport_offset(skb));
439 skb_set_inner_protocol(skb, skb->protocol);
440 skb->protocol = htons(ETH_P_IPV6);
441 break;
442 case SEG6_IPTUN_MODE_L2ENCAP:
443 case SEG6_IPTUN_MODE_L2ENCAP_RED:
444 if (!skb_mac_header_was_set(skb))
445 return -EINVAL;
446
447 if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
448 return -ENOMEM;
449
450 skb_mac_header_rebuild(skb);
451 skb_push(skb, skb->mac_len);
452
453 if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
454 err = __seg6_do_srh_encap(skb, tinfo->srh,
455 IPPROTO_ETHERNET, cache_dst,
456 &slwt->tunsrc);
457 else
458 err = seg6_do_srh_encap_red(skb, tinfo->srh,
459 IPPROTO_ETHERNET, cache_dst,
460 &slwt->tunsrc);
461
462 if (err)
463 return err;
464
465 skb->protocol = htons(ETH_P_IPV6);
466 break;
467 }
468
469 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
470 nf_reset_ct(skb);
471
472 return 0;
473 }
474
475 /* insert an SRH within an IPv6 packet, just after the IPv6 header */
seg6_do_srh_inline(struct sk_buff * skb,struct ipv6_sr_hdr * osrh)476 int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
477 {
478 return __seg6_do_srh_inline(skb, osrh, NULL);
479 }
480 EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
481
seg6_input_finish(struct net * net,struct sock * sk,struct sk_buff * skb)482 static int seg6_input_finish(struct net *net, struct sock *sk,
483 struct sk_buff *skb)
484 {
485 return dst_input(skb);
486 }
487
seg6_input_core(struct net * net,struct sock * sk,struct sk_buff * skb)488 static int seg6_input_core(struct net *net, struct sock *sk,
489 struct sk_buff *skb)
490 {
491 struct dst_entry *orig_dst = skb_dst(skb);
492 struct dst_entry *dst = NULL;
493 struct lwtunnel_state *lwtst;
494 struct seg6_lwt *slwt;
495 int err;
496
497 /* We cannot dereference "orig_dst" once ip6_route_input() or
498 * skb_dst_drop() is called. However, in order to detect a dst loop, we
499 * need the address of its lwtstate. So, save the address of lwtstate
500 * now and use it later as a comparison.
501 */
502 lwtst = orig_dst->lwtstate;
503
504 slwt = seg6_lwt_lwtunnel(lwtst);
505
506 local_bh_disable();
507 dst = dst_cache_get(&slwt->cache_input);
508 local_bh_enable();
509
510 err = seg6_do_srh(skb, dst);
511 if (unlikely(err)) {
512 dst_release(dst);
513 goto drop;
514 }
515
516 if (!dst) {
517 ip6_route_input(skb);
518
519 /* ip6_route_input() sets a NOREF dst; force a refcount on it
520 * before caching or further use.
521 */
522 skb_dst_force(skb);
523 dst = skb_dst(skb);
524 if (unlikely(!dst)) {
525 err = -ENETUNREACH;
526 goto drop;
527 }
528
529 /* cache only if we don't create a dst reference loop */
530 if (!dst->error && lwtst != dst->lwtstate) {
531 local_bh_disable();
532 dst_cache_set_ip6(&slwt->cache_input, dst,
533 &ipv6_hdr(skb)->saddr);
534 local_bh_enable();
535 }
536
537 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
538 if (unlikely(err))
539 goto drop;
540 } else {
541 skb_dst_drop(skb);
542 skb_dst_set(skb, dst);
543 }
544
545 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
546 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
547 dev_net(skb->dev), NULL, skb, NULL,
548 skb_dst_dev(skb), seg6_input_finish);
549
550 return seg6_input_finish(dev_net(skb->dev), NULL, skb);
551 drop:
552 kfree_skb(skb);
553 return err;
554 }
555
seg6_input_nf(struct sk_buff * skb)556 static int seg6_input_nf(struct sk_buff *skb)
557 {
558 struct net_device *dev = skb_dst_dev(skb);
559 struct net *net = dev_net(skb->dev);
560
561 switch (skb->protocol) {
562 case htons(ETH_P_IP):
563 return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
564 skb, NULL, dev, seg6_input_core);
565 case htons(ETH_P_IPV6):
566 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
567 skb, NULL, dev, seg6_input_core);
568 }
569
570 return -EINVAL;
571 }
572
seg6_input(struct sk_buff * skb)573 static int seg6_input(struct sk_buff *skb)
574 {
575 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
576 return seg6_input_nf(skb);
577
578 return seg6_input_core(dev_net(skb->dev), NULL, skb);
579 }
580
seg6_output_core(struct net * net,struct sock * sk,struct sk_buff * skb)581 static int seg6_output_core(struct net *net, struct sock *sk,
582 struct sk_buff *skb)
583 {
584 struct dst_entry *orig_dst = skb_dst(skb);
585 struct dst_entry *dst = NULL;
586 struct seg6_lwt *slwt;
587 int err;
588
589 slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
590
591 local_bh_disable();
592 dst = dst_cache_get(&slwt->cache_output);
593 local_bh_enable();
594
595 err = seg6_do_srh(skb, dst);
596 if (unlikely(err))
597 goto drop;
598
599 if (unlikely(!dst)) {
600 struct ipv6hdr *hdr = ipv6_hdr(skb);
601 struct flowi6 fl6;
602
603 memset(&fl6, 0, sizeof(fl6));
604 fl6.daddr = hdr->daddr;
605 fl6.saddr = hdr->saddr;
606 fl6.flowlabel = ip6_flowinfo(hdr);
607 fl6.flowi6_mark = skb->mark;
608 fl6.flowi6_proto = hdr->nexthdr;
609
610 dst = ip6_route_output(net, NULL, &fl6);
611 if (dst->error) {
612 err = dst->error;
613 goto drop;
614 }
615
616 /* cache only if we don't create a dst reference loop */
617 if (orig_dst->lwtstate != dst->lwtstate) {
618 local_bh_disable();
619 dst_cache_set_ip6(&slwt->cache_output, dst, &fl6.saddr);
620 local_bh_enable();
621 }
622
623 err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
624 if (unlikely(err))
625 goto drop;
626 }
627
628 skb_dst_drop(skb);
629 skb_dst_set(skb, dst);
630
631 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
632 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
633 NULL, dst_dev(dst), dst_output);
634
635 return dst_output(net, sk, skb);
636 drop:
637 dst_release(dst);
638 kfree_skb(skb);
639 return err;
640 }
641
seg6_output_nf(struct net * net,struct sock * sk,struct sk_buff * skb)642 static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
643 {
644 struct net_device *dev = skb_dst_dev(skb);
645
646 switch (skb->protocol) {
647 case htons(ETH_P_IP):
648 return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
649 NULL, dev, seg6_output_core);
650 case htons(ETH_P_IPV6):
651 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
652 NULL, dev, seg6_output_core);
653 }
654
655 return -EINVAL;
656 }
657
seg6_output(struct net * net,struct sock * sk,struct sk_buff * skb)658 static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
659 {
660 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
661 return seg6_output_nf(net, sk, skb);
662
663 return seg6_output_core(net, sk, skb);
664 }
665
seg6_build_state(struct net * net,struct nlattr * nla,unsigned int family,const void * cfg,struct lwtunnel_state ** ts,struct netlink_ext_ack * extack)666 static int seg6_build_state(struct net *net, struct nlattr *nla,
667 unsigned int family, const void *cfg,
668 struct lwtunnel_state **ts,
669 struct netlink_ext_ack *extack)
670 {
671 struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
672 struct seg6_iptunnel_encap *tuninfo;
673 struct lwtunnel_state *newts;
674 int tuninfo_len, min_size;
675 struct seg6_lwt *slwt;
676 int err;
677
678 if (family != AF_INET && family != AF_INET6)
679 return -EINVAL;
680
681 err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
682 seg6_iptunnel_policy, extack);
683
684 if (err < 0)
685 return err;
686
687 if (!tb[SEG6_IPTUNNEL_SRH])
688 return -EINVAL;
689
690 tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
691 tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
692
693 /* tuninfo must contain at least the iptunnel encap structure,
694 * the SRH and one segment
695 */
696 min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
697 sizeof(struct in6_addr);
698 if (tuninfo_len < min_size)
699 return -EINVAL;
700
701 switch (tuninfo->mode) {
702 case SEG6_IPTUN_MODE_INLINE:
703 if (family != AF_INET6)
704 return -EINVAL;
705
706 if (tb[SEG6_IPTUNNEL_SRC]) {
707 NL_SET_ERR_MSG(extack, "incompatible mode for tunsrc");
708 return -EINVAL;
709 }
710 break;
711 case SEG6_IPTUN_MODE_ENCAP:
712 break;
713 case SEG6_IPTUN_MODE_L2ENCAP:
714 break;
715 case SEG6_IPTUN_MODE_ENCAP_RED:
716 break;
717 case SEG6_IPTUN_MODE_L2ENCAP_RED:
718 break;
719 default:
720 return -EINVAL;
721 }
722
723 /* verify that SRH is consistent */
724 if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
725 return -EINVAL;
726
727 newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
728 if (!newts)
729 return -ENOMEM;
730
731 slwt = seg6_lwt_lwtunnel(newts);
732
733 err = dst_cache_init(&slwt->cache_input, GFP_ATOMIC);
734 if (err)
735 goto err_free_newts;
736
737 err = dst_cache_init(&slwt->cache_output, GFP_ATOMIC);
738 if (err)
739 goto err_destroy_input;
740
741 memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
742
743 if (tb[SEG6_IPTUNNEL_SRC]) {
744 slwt->tunsrc = nla_get_in6_addr(tb[SEG6_IPTUNNEL_SRC]);
745
746 if (ipv6_addr_any(&slwt->tunsrc) ||
747 ipv6_addr_is_multicast(&slwt->tunsrc) ||
748 ipv6_addr_loopback(&slwt->tunsrc)) {
749 NL_SET_ERR_MSG(extack, "invalid tunsrc address");
750 err = -EINVAL;
751 goto err_destroy_output;
752 }
753 }
754
755 newts->type = LWTUNNEL_ENCAP_SEG6;
756 newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
757
758 if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP &&
759 tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP_RED)
760 newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
761
762 newts->headroom = seg6_lwt_headroom(tuninfo);
763
764 *ts = newts;
765
766 return 0;
767
768 err_destroy_output:
769 dst_cache_destroy(&slwt->cache_output);
770 err_destroy_input:
771 dst_cache_destroy(&slwt->cache_input);
772 err_free_newts:
773 kfree(newts);
774 return err;
775 }
776
seg6_destroy_state(struct lwtunnel_state * lwt)777 static void seg6_destroy_state(struct lwtunnel_state *lwt)
778 {
779 struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwt);
780
781 dst_cache_destroy(&slwt->cache_input);
782 dst_cache_destroy(&slwt->cache_output);
783 }
784
seg6_fill_encap_info(struct sk_buff * skb,struct lwtunnel_state * lwtstate)785 static int seg6_fill_encap_info(struct sk_buff *skb,
786 struct lwtunnel_state *lwtstate)
787 {
788 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
789 struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate);
790
791 if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
792 return -EMSGSIZE;
793
794 if (!ipv6_addr_any(&slwt->tunsrc) &&
795 nla_put_in6_addr(skb, SEG6_IPTUNNEL_SRC, &slwt->tunsrc))
796 return -EMSGSIZE;
797
798 return 0;
799 }
800
seg6_encap_nlsize(struct lwtunnel_state * lwtstate)801 static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
802 {
803 struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
804 struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate);
805 int nlsize;
806
807 nlsize = nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
808
809 if (!ipv6_addr_any(&slwt->tunsrc))
810 nlsize += nla_total_size(sizeof(slwt->tunsrc));
811
812 return nlsize;
813 }
814
seg6_encap_cmp(struct lwtunnel_state * a,struct lwtunnel_state * b)815 static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
816 {
817 struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
818 struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
819 struct seg6_lwt *a_slwt = seg6_lwt_lwtunnel(a);
820 struct seg6_lwt *b_slwt = seg6_lwt_lwtunnel(b);
821 int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
822
823 if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
824 return 1;
825
826 if (!ipv6_addr_equal(&a_slwt->tunsrc, &b_slwt->tunsrc))
827 return 1;
828
829 return memcmp(a_hdr, b_hdr, len);
830 }
831
832 static const struct lwtunnel_encap_ops seg6_iptun_ops = {
833 .build_state = seg6_build_state,
834 .destroy_state = seg6_destroy_state,
835 .output = seg6_output,
836 .input = seg6_input,
837 .fill_encap = seg6_fill_encap_info,
838 .get_encap_size = seg6_encap_nlsize,
839 .cmp_encap = seg6_encap_cmp,
840 .owner = THIS_MODULE,
841 };
842
seg6_iptunnel_init(void)843 int __init seg6_iptunnel_init(void)
844 {
845 return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
846 }
847
seg6_iptunnel_exit(void)848 void seg6_iptunnel_exit(void)
849 {
850 lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
851 }
852