1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * lwtunnel Infrastructure for light weight tunnels like mpls 4 * 5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 #include <linux/skbuff.h> 15 #include <linux/netdevice.h> 16 #include <linux/lwtunnel.h> 17 #include <linux/in.h> 18 #include <linux/init.h> 19 #include <linux/err.h> 20 21 #include <net/lwtunnel.h> 22 #include <net/rtnetlink.h> 23 #include <net/ip6_fib.h> 24 #include <net/rtnh.h> 25 26 DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 27 EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); 28 29 #ifdef CONFIG_MODULES 30 31 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 32 { 33 /* Only lwt encaps implemented without using an interface for 34 * the encap need to return a string here. 35 */ 36 switch (encap_type) { 37 case LWTUNNEL_ENCAP_MPLS: 38 return "MPLS"; 39 case LWTUNNEL_ENCAP_ILA: 40 return "ILA"; 41 case LWTUNNEL_ENCAP_SEG6: 42 return "SEG6"; 43 case LWTUNNEL_ENCAP_BPF: 44 return "BPF"; 45 case LWTUNNEL_ENCAP_SEG6_LOCAL: 46 return "SEG6LOCAL"; 47 case LWTUNNEL_ENCAP_RPL: 48 return "RPL"; 49 case LWTUNNEL_ENCAP_IOAM6: 50 return "IOAM6"; 51 case LWTUNNEL_ENCAP_XFRM: 52 /* module autoload not supported for encap type */ 53 return NULL; 54 case LWTUNNEL_ENCAP_IP6: 55 case LWTUNNEL_ENCAP_IP: 56 case LWTUNNEL_ENCAP_NONE: 57 case __LWTUNNEL_ENCAP_MAX: 58 /* should not have got here */ 59 WARN_ON(1); 60 break; 61 } 62 return NULL; 63 } 64 65 #endif /* CONFIG_MODULES */ 66 67 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 68 { 69 struct lwtunnel_state *lws; 70 71 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 72 73 return lws; 74 } 75 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 76 77 static const struct lwtunnel_encap_ops __rcu * 78 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 79 80 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 81 unsigned int num) 82 { 83 if (num > LWTUNNEL_ENCAP_MAX) 84 return -ERANGE; 85 86 return !cmpxchg((const struct lwtunnel_encap_ops **) 87 &lwtun_encaps[num], 88 NULL, ops) ? 0 : -1; 89 } 90 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 91 92 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 93 unsigned int encap_type) 94 { 95 int ret; 96 97 if (encap_type == LWTUNNEL_ENCAP_NONE || 98 encap_type > LWTUNNEL_ENCAP_MAX) 99 return -ERANGE; 100 101 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 102 &lwtun_encaps[encap_type], 103 ops, NULL) == ops) ? 0 : -1; 104 105 synchronize_net(); 106 107 return ret; 108 } 109 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 110 111 int lwtunnel_build_state(struct net *net, u16 encap_type, 112 struct nlattr *encap, unsigned int family, 113 const void *cfg, struct lwtunnel_state **lws, 114 struct netlink_ext_ack *extack) 115 { 116 const struct lwtunnel_encap_ops *ops; 117 bool found = false; 118 int ret = -EINVAL; 119 120 if (encap_type == LWTUNNEL_ENCAP_NONE || 121 encap_type > LWTUNNEL_ENCAP_MAX) { 122 NL_SET_ERR_MSG_ATTR(extack, encap, 123 "Unknown LWT encapsulation type"); 124 return ret; 125 } 126 127 ret = -EOPNOTSUPP; 128 rcu_read_lock(); 129 ops = rcu_dereference(lwtun_encaps[encap_type]); 130 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 131 found = true; 132 rcu_read_unlock(); 133 134 if (found) { 135 ret = ops->build_state(net, encap, family, cfg, lws, extack); 136 if (ret) 137 module_put(ops->owner); 138 } else { 139 /* don't rely on -EOPNOTSUPP to detect match as build_state 140 * handlers could return it 141 */ 142 NL_SET_ERR_MSG_ATTR(extack, encap, 143 "LWT encapsulation type not supported"); 144 } 145 146 return ret; 147 } 148 EXPORT_SYMBOL_GPL(lwtunnel_build_state); 149 150 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) 151 { 152 const struct lwtunnel_encap_ops *ops; 153 int ret = -EINVAL; 154 155 if (encap_type == LWTUNNEL_ENCAP_NONE || 156 encap_type > LWTUNNEL_ENCAP_MAX) { 157 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 158 return ret; 159 } 160 161 rcu_read_lock(); 162 ops = rcu_dereference(lwtun_encaps[encap_type]); 163 rcu_read_unlock(); 164 #ifdef CONFIG_MODULES 165 if (!ops) { 166 const char *encap_type_str = lwtunnel_encap_str(encap_type); 167 168 if (encap_type_str) { 169 __rtnl_unlock(); 170 request_module("rtnl-lwt-%s", encap_type_str); 171 rtnl_lock(); 172 173 rcu_read_lock(); 174 ops = rcu_dereference(lwtun_encaps[encap_type]); 175 rcu_read_unlock(); 176 } 177 } 178 #endif 179 ret = ops ? 0 : -EOPNOTSUPP; 180 if (ret < 0) 181 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 182 183 return ret; 184 } 185 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 186 187 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 188 struct netlink_ext_ack *extack) 189 { 190 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 191 struct nlattr *nla_entype; 192 struct nlattr *attrs; 193 u16 encap_type; 194 int attrlen; 195 196 while (rtnh_ok(rtnh, remaining)) { 197 attrlen = rtnh_attrlen(rtnh); 198 if (attrlen > 0) { 199 attrs = rtnh_attrs(rtnh); 200 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 201 202 if (nla_entype) { 203 if (nla_len(nla_entype) < sizeof(u16)) { 204 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); 205 return -EINVAL; 206 } 207 encap_type = nla_get_u16(nla_entype); 208 209 if (lwtunnel_valid_encap_type(encap_type, 210 extack) != 0) 211 return -EOPNOTSUPP; 212 } 213 } 214 rtnh = rtnh_next(rtnh, &remaining); 215 } 216 217 return 0; 218 } 219 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 220 221 void lwtstate_free(struct lwtunnel_state *lws) 222 { 223 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 224 225 if (ops->destroy_state) { 226 ops->destroy_state(lws); 227 kfree_rcu(lws, rcu); 228 } else { 229 kfree(lws); 230 } 231 module_put(ops->owner); 232 } 233 EXPORT_SYMBOL_GPL(lwtstate_free); 234 235 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, 236 int encap_attr, int encap_type_attr) 237 { 238 const struct lwtunnel_encap_ops *ops; 239 struct nlattr *nest; 240 int ret; 241 242 if (!lwtstate) 243 return 0; 244 245 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 246 lwtstate->type > LWTUNNEL_ENCAP_MAX) 247 return 0; 248 249 nest = nla_nest_start_noflag(skb, encap_attr); 250 if (!nest) 251 return -EMSGSIZE; 252 253 ret = -EOPNOTSUPP; 254 rcu_read_lock(); 255 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 256 if (likely(ops && ops->fill_encap)) 257 ret = ops->fill_encap(skb, lwtstate); 258 rcu_read_unlock(); 259 260 if (ret) 261 goto nla_put_failure; 262 nla_nest_end(skb, nest); 263 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); 264 if (ret) 265 goto nla_put_failure; 266 267 return 0; 268 269 nla_put_failure: 270 nla_nest_cancel(skb, nest); 271 272 return (ret == -EOPNOTSUPP ? 0 : ret); 273 } 274 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 275 276 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 277 { 278 const struct lwtunnel_encap_ops *ops; 279 int ret = 0; 280 281 if (!lwtstate) 282 return 0; 283 284 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 285 lwtstate->type > LWTUNNEL_ENCAP_MAX) 286 return 0; 287 288 rcu_read_lock(); 289 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 290 if (likely(ops && ops->get_encap_size)) 291 ret = nla_total_size(ops->get_encap_size(lwtstate)); 292 rcu_read_unlock(); 293 294 return ret; 295 } 296 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 297 298 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 299 { 300 const struct lwtunnel_encap_ops *ops; 301 int ret = 0; 302 303 if (!a && !b) 304 return 0; 305 306 if (!a || !b) 307 return 1; 308 309 if (a->type != b->type) 310 return 1; 311 312 if (a->type == LWTUNNEL_ENCAP_NONE || 313 a->type > LWTUNNEL_ENCAP_MAX) 314 return 0; 315 316 rcu_read_lock(); 317 ops = rcu_dereference(lwtun_encaps[a->type]); 318 if (likely(ops && ops->cmp_encap)) 319 ret = ops->cmp_encap(a, b); 320 rcu_read_unlock(); 321 322 return ret; 323 } 324 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 325 326 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 327 { 328 struct dst_entry *dst = skb_dst(skb); 329 const struct lwtunnel_encap_ops *ops; 330 struct lwtunnel_state *lwtstate; 331 int ret = -EINVAL; 332 333 if (!dst) 334 goto drop; 335 lwtstate = dst->lwtstate; 336 337 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 338 lwtstate->type > LWTUNNEL_ENCAP_MAX) 339 return 0; 340 341 ret = -EOPNOTSUPP; 342 rcu_read_lock(); 343 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 344 if (likely(ops && ops->output)) 345 ret = ops->output(net, sk, skb); 346 rcu_read_unlock(); 347 348 if (ret == -EOPNOTSUPP) 349 goto drop; 350 351 return ret; 352 353 drop: 354 kfree_skb(skb); 355 356 return ret; 357 } 358 EXPORT_SYMBOL_GPL(lwtunnel_output); 359 360 int lwtunnel_xmit(struct sk_buff *skb) 361 { 362 struct dst_entry *dst = skb_dst(skb); 363 const struct lwtunnel_encap_ops *ops; 364 struct lwtunnel_state *lwtstate; 365 int ret = -EINVAL; 366 367 if (!dst) 368 goto drop; 369 370 lwtstate = dst->lwtstate; 371 372 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 373 lwtstate->type > LWTUNNEL_ENCAP_MAX) 374 return 0; 375 376 ret = -EOPNOTSUPP; 377 rcu_read_lock(); 378 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 379 if (likely(ops && ops->xmit)) 380 ret = ops->xmit(skb); 381 rcu_read_unlock(); 382 383 if (ret == -EOPNOTSUPP) 384 goto drop; 385 386 return ret; 387 388 drop: 389 kfree_skb(skb); 390 391 return ret; 392 } 393 EXPORT_SYMBOL_GPL(lwtunnel_xmit); 394 395 int lwtunnel_input(struct sk_buff *skb) 396 { 397 struct dst_entry *dst = skb_dst(skb); 398 const struct lwtunnel_encap_ops *ops; 399 struct lwtunnel_state *lwtstate; 400 int ret = -EINVAL; 401 402 if (!dst) 403 goto drop; 404 lwtstate = dst->lwtstate; 405 406 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 407 lwtstate->type > LWTUNNEL_ENCAP_MAX) 408 return 0; 409 410 ret = -EOPNOTSUPP; 411 rcu_read_lock(); 412 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 413 if (likely(ops && ops->input)) 414 ret = ops->input(skb); 415 rcu_read_unlock(); 416 417 if (ret == -EOPNOTSUPP) 418 goto drop; 419 420 return ret; 421 422 drop: 423 kfree_skb(skb); 424 425 return ret; 426 } 427 EXPORT_SYMBOL_GPL(lwtunnel_input); 428