1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * lwtunnel Infrastructure for light weight tunnels like mpls
4 *
5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
6 */
7
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/lwtunnel.h>
17 #include <linux/in.h>
18 #include <linux/init.h>
19 #include <linux/err.h>
20
21 #include <net/lwtunnel.h>
22 #include <net/rtnetlink.h>
23 #include <net/ip6_fib.h>
24 #include <net/rtnh.h>
25
26 #include "dev.h"
27
28 DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
29 EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
30
31 #ifdef CONFIG_MODULES
32
lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
34 {
35 /* Only lwt encaps implemented without using an interface for
36 * the encap need to return a string here.
37 */
38 switch (encap_type) {
39 case LWTUNNEL_ENCAP_MPLS:
40 return "MPLS";
41 case LWTUNNEL_ENCAP_ILA:
42 return "ILA";
43 case LWTUNNEL_ENCAP_SEG6:
44 return "SEG6";
45 case LWTUNNEL_ENCAP_BPF:
46 return "BPF";
47 case LWTUNNEL_ENCAP_SEG6_LOCAL:
48 return "SEG6LOCAL";
49 case LWTUNNEL_ENCAP_RPL:
50 return "RPL";
51 case LWTUNNEL_ENCAP_IOAM6:
52 return "IOAM6";
53 case LWTUNNEL_ENCAP_XFRM:
54 /* module autoload not supported for encap type */
55 return NULL;
56 case LWTUNNEL_ENCAP_IP6:
57 case LWTUNNEL_ENCAP_IP:
58 case LWTUNNEL_ENCAP_NONE:
59 case __LWTUNNEL_ENCAP_MAX:
60 /* should not have got here */
61 WARN_ON(1);
62 break;
63 }
64 return NULL;
65 }
66
67 #endif /* CONFIG_MODULES */
68
lwtunnel_state_alloc(int encap_len)69 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
70 {
71 struct lwtunnel_state *lws;
72
73 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
74
75 return lws;
76 }
77 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
78
79 static const struct lwtunnel_encap_ops __rcu *
80 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
81
lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops * ops,unsigned int num)82 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
83 unsigned int num)
84 {
85 if (num > LWTUNNEL_ENCAP_MAX)
86 return -ERANGE;
87
88 return !cmpxchg((const struct lwtunnel_encap_ops **)
89 &lwtun_encaps[num],
90 NULL, ops) ? 0 : -1;
91 }
92 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
93
lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops * ops,unsigned int encap_type)94 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
95 unsigned int encap_type)
96 {
97 int ret;
98
99 if (encap_type == LWTUNNEL_ENCAP_NONE ||
100 encap_type > LWTUNNEL_ENCAP_MAX)
101 return -ERANGE;
102
103 ret = (cmpxchg((const struct lwtunnel_encap_ops **)
104 &lwtun_encaps[encap_type],
105 ops, NULL) == ops) ? 0 : -1;
106
107 synchronize_net();
108
109 return ret;
110 }
111 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
112
lwtunnel_build_state(struct net * net,u16 encap_type,struct nlattr * encap,unsigned int family,const void * cfg,struct lwtunnel_state ** lws,struct netlink_ext_ack * extack)113 int lwtunnel_build_state(struct net *net, u16 encap_type,
114 struct nlattr *encap, unsigned int family,
115 const void *cfg, struct lwtunnel_state **lws,
116 struct netlink_ext_ack *extack)
117 {
118 const struct lwtunnel_encap_ops *ops;
119 bool found = false;
120 int ret = -EINVAL;
121
122 if (encap_type == LWTUNNEL_ENCAP_NONE ||
123 encap_type > LWTUNNEL_ENCAP_MAX) {
124 NL_SET_ERR_MSG_ATTR(extack, encap,
125 "Unknown LWT encapsulation type");
126 return ret;
127 }
128
129 ret = -EOPNOTSUPP;
130 rcu_read_lock();
131 ops = rcu_dereference(lwtun_encaps[encap_type]);
132 if (likely(ops && ops->build_state && try_module_get(ops->owner)))
133 found = true;
134 rcu_read_unlock();
135
136 if (found) {
137 ret = ops->build_state(net, encap, family, cfg, lws, extack);
138 if (ret)
139 module_put(ops->owner);
140 } else {
141 /* don't rely on -EOPNOTSUPP to detect match as build_state
142 * handlers could return it
143 */
144 NL_SET_ERR_MSG_ATTR(extack, encap,
145 "LWT encapsulation type not supported");
146 }
147
148 return ret;
149 }
150 EXPORT_SYMBOL_GPL(lwtunnel_build_state);
151
lwtunnel_valid_encap_type(u16 encap_type,struct netlink_ext_ack * extack)152 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
153 {
154 const struct lwtunnel_encap_ops *ops;
155 int ret = -EINVAL;
156
157 if (encap_type == LWTUNNEL_ENCAP_NONE ||
158 encap_type > LWTUNNEL_ENCAP_MAX) {
159 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
160 return ret;
161 }
162
163 ops = rcu_access_pointer(lwtun_encaps[encap_type]);
164 #ifdef CONFIG_MODULES
165 if (!ops) {
166 const char *encap_type_str = lwtunnel_encap_str(encap_type);
167
168 if (encap_type_str) {
169 request_module("rtnl-lwt-%s", encap_type_str);
170 ops = rcu_access_pointer(lwtun_encaps[encap_type]);
171 }
172 }
173 #endif
174 ret = ops ? 0 : -EOPNOTSUPP;
175 if (ret < 0)
176 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
177
178 return ret;
179 }
180 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
181
lwtunnel_valid_encap_type_attr(struct nlattr * attr,int remaining,struct netlink_ext_ack * extack)182 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
183 struct netlink_ext_ack *extack)
184 {
185 struct rtnexthop *rtnh = (struct rtnexthop *)attr;
186 struct nlattr *nla_entype;
187 struct nlattr *attrs;
188 u16 encap_type;
189 int attrlen;
190
191 while (rtnh_ok(rtnh, remaining)) {
192 attrlen = rtnh_attrlen(rtnh);
193 if (attrlen > 0) {
194 attrs = rtnh_attrs(rtnh);
195 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
196
197 if (nla_entype) {
198 if (nla_len(nla_entype) < sizeof(u16)) {
199 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE");
200 return -EINVAL;
201 }
202 encap_type = nla_get_u16(nla_entype);
203
204 if (lwtunnel_valid_encap_type(encap_type, extack))
205 return -EOPNOTSUPP;
206 }
207 }
208 rtnh = rtnh_next(rtnh, &remaining);
209 }
210
211 return 0;
212 }
213 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
214
lwtstate_free(struct lwtunnel_state * lws)215 void lwtstate_free(struct lwtunnel_state *lws)
216 {
217 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
218
219 if (ops->destroy_state) {
220 ops->destroy_state(lws);
221 kfree_rcu(lws, rcu);
222 } else {
223 kfree(lws);
224 }
225 module_put(ops->owner);
226 }
227 EXPORT_SYMBOL_GPL(lwtstate_free);
228
lwtunnel_fill_encap(struct sk_buff * skb,struct lwtunnel_state * lwtstate,int encap_attr,int encap_type_attr)229 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
230 int encap_attr, int encap_type_attr)
231 {
232 const struct lwtunnel_encap_ops *ops;
233 struct nlattr *nest;
234 int ret;
235
236 if (!lwtstate)
237 return 0;
238
239 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
240 lwtstate->type > LWTUNNEL_ENCAP_MAX)
241 return 0;
242
243 nest = nla_nest_start_noflag(skb, encap_attr);
244 if (!nest)
245 return -EMSGSIZE;
246
247 ret = -EOPNOTSUPP;
248 rcu_read_lock();
249 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
250 if (likely(ops && ops->fill_encap))
251 ret = ops->fill_encap(skb, lwtstate);
252 rcu_read_unlock();
253
254 if (ret)
255 goto nla_put_failure;
256 nla_nest_end(skb, nest);
257 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type);
258 if (ret)
259 goto nla_put_failure;
260
261 return 0;
262
263 nla_put_failure:
264 nla_nest_cancel(skb, nest);
265
266 return (ret == -EOPNOTSUPP ? 0 : ret);
267 }
268 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
269
lwtunnel_get_encap_size(struct lwtunnel_state * lwtstate)270 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
271 {
272 const struct lwtunnel_encap_ops *ops;
273 int ret = 0;
274
275 if (!lwtstate)
276 return 0;
277
278 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
279 lwtstate->type > LWTUNNEL_ENCAP_MAX)
280 return 0;
281
282 rcu_read_lock();
283 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
284 if (likely(ops && ops->get_encap_size))
285 ret = nla_total_size(ops->get_encap_size(lwtstate));
286 rcu_read_unlock();
287
288 return ret;
289 }
290 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
291
lwtunnel_cmp_encap(struct lwtunnel_state * a,struct lwtunnel_state * b)292 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
293 {
294 const struct lwtunnel_encap_ops *ops;
295 int ret = 0;
296
297 if (!a && !b)
298 return 0;
299
300 if (!a || !b)
301 return 1;
302
303 if (a->type != b->type)
304 return 1;
305
306 if (a->type == LWTUNNEL_ENCAP_NONE ||
307 a->type > LWTUNNEL_ENCAP_MAX)
308 return 0;
309
310 rcu_read_lock();
311 ops = rcu_dereference(lwtun_encaps[a->type]);
312 if (likely(ops && ops->cmp_encap))
313 ret = ops->cmp_encap(a, b);
314 rcu_read_unlock();
315
316 return ret;
317 }
318 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
319
lwtunnel_output(struct net * net,struct sock * sk,struct sk_buff * skb)320 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
321 {
322 const struct lwtunnel_encap_ops *ops;
323 struct lwtunnel_state *lwtstate;
324 struct dst_entry *dst;
325 int ret;
326
327 local_bh_disable();
328
329 if (dev_xmit_recursion()) {
330 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
331 __func__);
332 ret = -ENETDOWN;
333 goto drop;
334 }
335
336 dst = skb_dst(skb);
337 if (!dst) {
338 ret = -EINVAL;
339 goto drop;
340 }
341 lwtstate = dst->lwtstate;
342
343 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
344 lwtstate->type > LWTUNNEL_ENCAP_MAX) {
345 ret = 0;
346 goto out;
347 }
348
349 ret = -EOPNOTSUPP;
350 rcu_read_lock();
351 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
352 if (likely(ops && ops->output)) {
353 dev_xmit_recursion_inc();
354 ret = ops->output(net, sk, skb);
355 dev_xmit_recursion_dec();
356 }
357 rcu_read_unlock();
358
359 if (ret == -EOPNOTSUPP)
360 goto drop;
361
362 goto out;
363
364 drop:
365 kfree_skb(skb);
366
367 out:
368 local_bh_enable();
369 return ret;
370 }
371 EXPORT_SYMBOL_GPL(lwtunnel_output);
372
lwtunnel_xmit(struct sk_buff * skb)373 int lwtunnel_xmit(struct sk_buff *skb)
374 {
375 const struct lwtunnel_encap_ops *ops;
376 struct lwtunnel_state *lwtstate;
377 struct dst_entry *dst;
378 int ret;
379
380 local_bh_disable();
381
382 if (dev_xmit_recursion()) {
383 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
384 __func__);
385 ret = -ENETDOWN;
386 goto drop;
387 }
388
389 dst = skb_dst(skb);
390 if (!dst) {
391 ret = -EINVAL;
392 goto drop;
393 }
394
395 lwtstate = dst->lwtstate;
396
397 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
398 lwtstate->type > LWTUNNEL_ENCAP_MAX) {
399 ret = 0;
400 goto out;
401 }
402
403 ret = -EOPNOTSUPP;
404 rcu_read_lock();
405 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
406 if (likely(ops && ops->xmit)) {
407 dev_xmit_recursion_inc();
408 ret = ops->xmit(skb);
409 dev_xmit_recursion_dec();
410 }
411 rcu_read_unlock();
412
413 if (ret == -EOPNOTSUPP)
414 goto drop;
415
416 goto out;
417
418 drop:
419 kfree_skb(skb);
420
421 out:
422 local_bh_enable();
423 return ret;
424 }
425 EXPORT_SYMBOL_GPL(lwtunnel_xmit);
426
lwtunnel_input(struct sk_buff * skb)427 int lwtunnel_input(struct sk_buff *skb)
428 {
429 const struct lwtunnel_encap_ops *ops;
430 struct lwtunnel_state *lwtstate;
431 struct dst_entry *dst;
432 int ret;
433
434 DEBUG_NET_WARN_ON_ONCE(!in_softirq());
435
436 if (dev_xmit_recursion()) {
437 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
438 __func__);
439 ret = -ENETDOWN;
440 goto drop;
441 }
442
443 dst = skb_dst(skb);
444 if (!dst) {
445 ret = -EINVAL;
446 goto drop;
447 }
448 lwtstate = dst->lwtstate;
449
450 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
451 lwtstate->type > LWTUNNEL_ENCAP_MAX)
452 return 0;
453
454 ret = -EOPNOTSUPP;
455 rcu_read_lock();
456 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
457 if (likely(ops && ops->input)) {
458 dev_xmit_recursion_inc();
459 ret = ops->input(skb);
460 dev_xmit_recursion_dec();
461 }
462 rcu_read_unlock();
463
464 if (ret == -EOPNOTSUPP)
465 goto drop;
466
467 return ret;
468
469 drop:
470 kfree_skb(skb);
471
472 return ret;
473 }
474 EXPORT_SYMBOL_GPL(lwtunnel_input);
475