1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * lwtunnel Infrastructure for light weight tunnels like mpls 4 * 5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 #include <linux/skbuff.h> 15 #include <linux/netdevice.h> 16 #include <linux/lwtunnel.h> 17 #include <linux/in.h> 18 #include <linux/init.h> 19 #include <linux/err.h> 20 21 #include <net/lwtunnel.h> 22 #include <net/rtnetlink.h> 23 #include <net/ip6_fib.h> 24 #include <net/rtnh.h> 25 26 #include "dev.h" 27 28 DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 29 EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); 30 31 #ifdef CONFIG_MODULES 32 33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 34 { 35 /* Only lwt encaps implemented without using an interface for 36 * the encap need to return a string here. 37 */ 38 switch (encap_type) { 39 case LWTUNNEL_ENCAP_MPLS: 40 return "MPLS"; 41 case LWTUNNEL_ENCAP_ILA: 42 return "ILA"; 43 case LWTUNNEL_ENCAP_SEG6: 44 return "SEG6"; 45 case LWTUNNEL_ENCAP_BPF: 46 return "BPF"; 47 case LWTUNNEL_ENCAP_SEG6_LOCAL: 48 return "SEG6LOCAL"; 49 case LWTUNNEL_ENCAP_RPL: 50 return "RPL"; 51 case LWTUNNEL_ENCAP_IOAM6: 52 return "IOAM6"; 53 case LWTUNNEL_ENCAP_XFRM: 54 /* module autoload not supported for encap type */ 55 return NULL; 56 case LWTUNNEL_ENCAP_IP6: 57 case LWTUNNEL_ENCAP_IP: 58 case LWTUNNEL_ENCAP_NONE: 59 case __LWTUNNEL_ENCAP_MAX: 60 /* should not have got here */ 61 WARN_ON(1); 62 break; 63 } 64 return NULL; 65 } 66 67 #endif /* CONFIG_MODULES */ 68 69 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 70 { 71 struct lwtunnel_state *lws; 72 73 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 74 75 return lws; 76 } 77 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 78 79 static const struct lwtunnel_encap_ops __rcu * 80 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 81 82 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 83 unsigned int num) 84 { 85 if (num > LWTUNNEL_ENCAP_MAX) 86 return -ERANGE; 87 88 return !cmpxchg((const struct lwtunnel_encap_ops **) 89 &lwtun_encaps[num], 90 NULL, ops) ? 0 : -1; 91 } 92 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 93 94 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 95 unsigned int encap_type) 96 { 97 int ret; 98 99 if (encap_type == LWTUNNEL_ENCAP_NONE || 100 encap_type > LWTUNNEL_ENCAP_MAX) 101 return -ERANGE; 102 103 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 104 &lwtun_encaps[encap_type], 105 ops, NULL) == ops) ? 0 : -1; 106 107 synchronize_net(); 108 109 return ret; 110 } 111 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 112 113 int lwtunnel_build_state(struct net *net, u16 encap_type, 114 struct nlattr *encap, unsigned int family, 115 const void *cfg, struct lwtunnel_state **lws, 116 struct netlink_ext_ack *extack) 117 { 118 const struct lwtunnel_encap_ops *ops; 119 bool found = false; 120 int ret = -EINVAL; 121 122 if (encap_type == LWTUNNEL_ENCAP_NONE || 123 encap_type > LWTUNNEL_ENCAP_MAX) { 124 NL_SET_ERR_MSG_ATTR(extack, encap, 125 "Unknown LWT encapsulation type"); 126 return ret; 127 } 128 129 ret = -EOPNOTSUPP; 130 rcu_read_lock(); 131 ops = rcu_dereference(lwtun_encaps[encap_type]); 132 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 133 found = true; 134 rcu_read_unlock(); 135 136 if (found) { 137 ret = ops->build_state(net, encap, family, cfg, lws, extack); 138 if (ret) 139 module_put(ops->owner); 140 } else { 141 /* don't rely on -EOPNOTSUPP to detect match as build_state 142 * handlers could return it 143 */ 144 NL_SET_ERR_MSG_ATTR(extack, encap, 145 "LWT encapsulation type not supported"); 146 } 147 148 return ret; 149 } 150 EXPORT_SYMBOL_GPL(lwtunnel_build_state); 151 152 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack, 153 bool rtnl_is_held) 154 { 155 const struct lwtunnel_encap_ops *ops; 156 int ret = -EINVAL; 157 158 if (encap_type == LWTUNNEL_ENCAP_NONE || 159 encap_type > LWTUNNEL_ENCAP_MAX) { 160 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 161 return ret; 162 } 163 164 ops = rcu_access_pointer(lwtun_encaps[encap_type]); 165 #ifdef CONFIG_MODULES 166 if (!ops) { 167 const char *encap_type_str = lwtunnel_encap_str(encap_type); 168 169 if (encap_type_str) { 170 if (rtnl_is_held) 171 __rtnl_unlock(); 172 request_module("rtnl-lwt-%s", encap_type_str); 173 if (rtnl_is_held) 174 rtnl_lock(); 175 176 ops = rcu_access_pointer(lwtun_encaps[encap_type]); 177 } 178 } 179 #endif 180 ret = ops ? 0 : -EOPNOTSUPP; 181 if (ret < 0) 182 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 183 184 return ret; 185 } 186 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 187 188 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 189 struct netlink_ext_ack *extack, 190 bool rtnl_is_held) 191 { 192 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 193 struct nlattr *nla_entype; 194 struct nlattr *attrs; 195 u16 encap_type; 196 int attrlen; 197 198 while (rtnh_ok(rtnh, remaining)) { 199 attrlen = rtnh_attrlen(rtnh); 200 if (attrlen > 0) { 201 attrs = rtnh_attrs(rtnh); 202 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 203 204 if (nla_entype) { 205 if (nla_len(nla_entype) < sizeof(u16)) { 206 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); 207 return -EINVAL; 208 } 209 encap_type = nla_get_u16(nla_entype); 210 211 if (lwtunnel_valid_encap_type(encap_type, 212 extack, 213 rtnl_is_held) != 0) 214 return -EOPNOTSUPP; 215 } 216 } 217 rtnh = rtnh_next(rtnh, &remaining); 218 } 219 220 return 0; 221 } 222 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 223 224 void lwtstate_free(struct lwtunnel_state *lws) 225 { 226 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 227 228 if (ops->destroy_state) { 229 ops->destroy_state(lws); 230 kfree_rcu(lws, rcu); 231 } else { 232 kfree(lws); 233 } 234 module_put(ops->owner); 235 } 236 EXPORT_SYMBOL_GPL(lwtstate_free); 237 238 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, 239 int encap_attr, int encap_type_attr) 240 { 241 const struct lwtunnel_encap_ops *ops; 242 struct nlattr *nest; 243 int ret; 244 245 if (!lwtstate) 246 return 0; 247 248 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 249 lwtstate->type > LWTUNNEL_ENCAP_MAX) 250 return 0; 251 252 nest = nla_nest_start_noflag(skb, encap_attr); 253 if (!nest) 254 return -EMSGSIZE; 255 256 ret = -EOPNOTSUPP; 257 rcu_read_lock(); 258 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 259 if (likely(ops && ops->fill_encap)) 260 ret = ops->fill_encap(skb, lwtstate); 261 rcu_read_unlock(); 262 263 if (ret) 264 goto nla_put_failure; 265 nla_nest_end(skb, nest); 266 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); 267 if (ret) 268 goto nla_put_failure; 269 270 return 0; 271 272 nla_put_failure: 273 nla_nest_cancel(skb, nest); 274 275 return (ret == -EOPNOTSUPP ? 0 : ret); 276 } 277 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 278 279 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 280 { 281 const struct lwtunnel_encap_ops *ops; 282 int ret = 0; 283 284 if (!lwtstate) 285 return 0; 286 287 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 288 lwtstate->type > LWTUNNEL_ENCAP_MAX) 289 return 0; 290 291 rcu_read_lock(); 292 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 293 if (likely(ops && ops->get_encap_size)) 294 ret = nla_total_size(ops->get_encap_size(lwtstate)); 295 rcu_read_unlock(); 296 297 return ret; 298 } 299 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 300 301 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 302 { 303 const struct lwtunnel_encap_ops *ops; 304 int ret = 0; 305 306 if (!a && !b) 307 return 0; 308 309 if (!a || !b) 310 return 1; 311 312 if (a->type != b->type) 313 return 1; 314 315 if (a->type == LWTUNNEL_ENCAP_NONE || 316 a->type > LWTUNNEL_ENCAP_MAX) 317 return 0; 318 319 rcu_read_lock(); 320 ops = rcu_dereference(lwtun_encaps[a->type]); 321 if (likely(ops && ops->cmp_encap)) 322 ret = ops->cmp_encap(a, b); 323 rcu_read_unlock(); 324 325 return ret; 326 } 327 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 328 329 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 330 { 331 const struct lwtunnel_encap_ops *ops; 332 struct lwtunnel_state *lwtstate; 333 struct dst_entry *dst; 334 int ret; 335 336 local_bh_disable(); 337 338 if (dev_xmit_recursion()) { 339 net_crit_ratelimited("%s(): recursion limit reached on datapath\n", 340 __func__); 341 ret = -ENETDOWN; 342 goto drop; 343 } 344 345 dst = skb_dst(skb); 346 if (!dst) { 347 ret = -EINVAL; 348 goto drop; 349 } 350 lwtstate = dst->lwtstate; 351 352 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 353 lwtstate->type > LWTUNNEL_ENCAP_MAX) { 354 ret = 0; 355 goto out; 356 } 357 358 ret = -EOPNOTSUPP; 359 rcu_read_lock(); 360 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 361 if (likely(ops && ops->output)) { 362 dev_xmit_recursion_inc(); 363 ret = ops->output(net, sk, skb); 364 dev_xmit_recursion_dec(); 365 } 366 rcu_read_unlock(); 367 368 if (ret == -EOPNOTSUPP) 369 goto drop; 370 371 goto out; 372 373 drop: 374 kfree_skb(skb); 375 376 out: 377 local_bh_enable(); 378 return ret; 379 } 380 EXPORT_SYMBOL_GPL(lwtunnel_output); 381 382 int lwtunnel_xmit(struct sk_buff *skb) 383 { 384 const struct lwtunnel_encap_ops *ops; 385 struct lwtunnel_state *lwtstate; 386 struct dst_entry *dst; 387 int ret; 388 389 local_bh_disable(); 390 391 if (dev_xmit_recursion()) { 392 net_crit_ratelimited("%s(): recursion limit reached on datapath\n", 393 __func__); 394 ret = -ENETDOWN; 395 goto drop; 396 } 397 398 dst = skb_dst(skb); 399 if (!dst) { 400 ret = -EINVAL; 401 goto drop; 402 } 403 404 lwtstate = dst->lwtstate; 405 406 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 407 lwtstate->type > LWTUNNEL_ENCAP_MAX) { 408 ret = 0; 409 goto out; 410 } 411 412 ret = -EOPNOTSUPP; 413 rcu_read_lock(); 414 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 415 if (likely(ops && ops->xmit)) { 416 dev_xmit_recursion_inc(); 417 ret = ops->xmit(skb); 418 dev_xmit_recursion_dec(); 419 } 420 rcu_read_unlock(); 421 422 if (ret == -EOPNOTSUPP) 423 goto drop; 424 425 goto out; 426 427 drop: 428 kfree_skb(skb); 429 430 out: 431 local_bh_enable(); 432 return ret; 433 } 434 EXPORT_SYMBOL_GPL(lwtunnel_xmit); 435 436 int lwtunnel_input(struct sk_buff *skb) 437 { 438 const struct lwtunnel_encap_ops *ops; 439 struct lwtunnel_state *lwtstate; 440 struct dst_entry *dst; 441 int ret; 442 443 DEBUG_NET_WARN_ON_ONCE(!in_softirq()); 444 445 if (dev_xmit_recursion()) { 446 net_crit_ratelimited("%s(): recursion limit reached on datapath\n", 447 __func__); 448 ret = -ENETDOWN; 449 goto drop; 450 } 451 452 dst = skb_dst(skb); 453 if (!dst) { 454 ret = -EINVAL; 455 goto drop; 456 } 457 lwtstate = dst->lwtstate; 458 459 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 460 lwtstate->type > LWTUNNEL_ENCAP_MAX) 461 return 0; 462 463 ret = -EOPNOTSUPP; 464 rcu_read_lock(); 465 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 466 if (likely(ops && ops->input)) { 467 dev_xmit_recursion_inc(); 468 ret = ops->input(skb); 469 dev_xmit_recursion_dec(); 470 } 471 rcu_read_unlock(); 472 473 if (ret == -EOPNOTSUPP) 474 goto drop; 475 476 return ret; 477 478 drop: 479 kfree_skb(skb); 480 481 return ret; 482 } 483 EXPORT_SYMBOL_GPL(lwtunnel_input); 484