1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * lwtunnel Infrastructure for light weight tunnels like mpls 4 * 5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 6 */ 7 8 #include <linux/capability.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 #include <linux/skbuff.h> 15 #include <linux/netdevice.h> 16 #include <linux/lwtunnel.h> 17 #include <linux/in.h> 18 #include <linux/init.h> 19 #include <linux/err.h> 20 21 #include <net/lwtunnel.h> 22 #include <net/rtnetlink.h> 23 #include <net/ip6_fib.h> 24 #include <net/rtnh.h> 25 26 DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 27 EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); 28 29 #ifdef CONFIG_MODULES 30 31 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 32 { 33 /* Only lwt encaps implemented without using an interface for 34 * the encap need to return a string here. 35 */ 36 switch (encap_type) { 37 case LWTUNNEL_ENCAP_MPLS: 38 return "MPLS"; 39 case LWTUNNEL_ENCAP_ILA: 40 return "ILA"; 41 case LWTUNNEL_ENCAP_SEG6: 42 return "SEG6"; 43 case LWTUNNEL_ENCAP_BPF: 44 return "BPF"; 45 case LWTUNNEL_ENCAP_SEG6_LOCAL: 46 return "SEG6LOCAL"; 47 case LWTUNNEL_ENCAP_RPL: 48 return "RPL"; 49 case LWTUNNEL_ENCAP_IOAM6: 50 return "IOAM6"; 51 case LWTUNNEL_ENCAP_XFRM: 52 /* module autoload not supported for encap type */ 53 return NULL; 54 case LWTUNNEL_ENCAP_IP6: 55 case LWTUNNEL_ENCAP_IP: 56 case LWTUNNEL_ENCAP_NONE: 57 case __LWTUNNEL_ENCAP_MAX: 58 /* should not have got here */ 59 WARN_ON(1); 60 break; 61 } 62 return NULL; 63 } 64 65 #endif /* CONFIG_MODULES */ 66 67 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 68 { 69 struct lwtunnel_state *lws; 70 71 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 72 73 return lws; 74 } 75 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 76 77 static const struct lwtunnel_encap_ops __rcu * 78 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 79 80 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 81 unsigned int num) 82 { 83 if (num > LWTUNNEL_ENCAP_MAX) 84 return -ERANGE; 85 86 return !cmpxchg((const struct lwtunnel_encap_ops **) 87 &lwtun_encaps[num], 88 NULL, ops) ? 0 : -1; 89 } 90 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 91 92 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 93 unsigned int encap_type) 94 { 95 int ret; 96 97 if (encap_type == LWTUNNEL_ENCAP_NONE || 98 encap_type > LWTUNNEL_ENCAP_MAX) 99 return -ERANGE; 100 101 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 102 &lwtun_encaps[encap_type], 103 ops, NULL) == ops) ? 0 : -1; 104 105 synchronize_net(); 106 107 return ret; 108 } 109 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 110 111 int lwtunnel_build_state(struct net *net, u16 encap_type, 112 struct nlattr *encap, unsigned int family, 113 const void *cfg, struct lwtunnel_state **lws, 114 struct netlink_ext_ack *extack) 115 { 116 const struct lwtunnel_encap_ops *ops; 117 bool found = false; 118 int ret = -EINVAL; 119 120 if (encap_type == LWTUNNEL_ENCAP_NONE || 121 encap_type > LWTUNNEL_ENCAP_MAX) { 122 NL_SET_ERR_MSG_ATTR(extack, encap, 123 "Unknown LWT encapsulation type"); 124 return ret; 125 } 126 127 ret = -EOPNOTSUPP; 128 rcu_read_lock(); 129 ops = rcu_dereference(lwtun_encaps[encap_type]); 130 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 131 found = true; 132 rcu_read_unlock(); 133 134 if (found) { 135 ret = ops->build_state(net, encap, family, cfg, lws, extack); 136 if (ret) 137 module_put(ops->owner); 138 } else { 139 /* don't rely on -EOPNOTSUPP to detect match as build_state 140 * handlers could return it 141 */ 142 NL_SET_ERR_MSG_ATTR(extack, encap, 143 "LWT encapsulation type not supported"); 144 } 145 146 return ret; 147 } 148 EXPORT_SYMBOL_GPL(lwtunnel_build_state); 149 150 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack, 151 bool rtnl_is_held) 152 { 153 const struct lwtunnel_encap_ops *ops; 154 int ret = -EINVAL; 155 156 if (encap_type == LWTUNNEL_ENCAP_NONE || 157 encap_type > LWTUNNEL_ENCAP_MAX) { 158 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 159 return ret; 160 } 161 162 ops = rcu_access_pointer(lwtun_encaps[encap_type]); 163 #ifdef CONFIG_MODULES 164 if (!ops) { 165 const char *encap_type_str = lwtunnel_encap_str(encap_type); 166 167 if (encap_type_str) { 168 if (rtnl_is_held) 169 __rtnl_unlock(); 170 request_module("rtnl-lwt-%s", encap_type_str); 171 if (rtnl_is_held) 172 rtnl_lock(); 173 174 ops = rcu_access_pointer(lwtun_encaps[encap_type]); 175 } 176 } 177 #endif 178 ret = ops ? 0 : -EOPNOTSUPP; 179 if (ret < 0) 180 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 181 182 return ret; 183 } 184 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 185 186 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 187 struct netlink_ext_ack *extack, 188 bool rtnl_is_held) 189 { 190 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 191 struct nlattr *nla_entype; 192 struct nlattr *attrs; 193 u16 encap_type; 194 int attrlen; 195 196 while (rtnh_ok(rtnh, remaining)) { 197 attrlen = rtnh_attrlen(rtnh); 198 if (attrlen > 0) { 199 attrs = rtnh_attrs(rtnh); 200 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 201 202 if (nla_entype) { 203 if (nla_len(nla_entype) < sizeof(u16)) { 204 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); 205 return -EINVAL; 206 } 207 encap_type = nla_get_u16(nla_entype); 208 209 if (lwtunnel_valid_encap_type(encap_type, 210 extack, 211 rtnl_is_held) != 0) 212 return -EOPNOTSUPP; 213 } 214 } 215 rtnh = rtnh_next(rtnh, &remaining); 216 } 217 218 return 0; 219 } 220 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 221 222 void lwtstate_free(struct lwtunnel_state *lws) 223 { 224 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 225 226 if (ops->destroy_state) { 227 ops->destroy_state(lws); 228 kfree_rcu(lws, rcu); 229 } else { 230 kfree(lws); 231 } 232 module_put(ops->owner); 233 } 234 EXPORT_SYMBOL_GPL(lwtstate_free); 235 236 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate, 237 int encap_attr, int encap_type_attr) 238 { 239 const struct lwtunnel_encap_ops *ops; 240 struct nlattr *nest; 241 int ret; 242 243 if (!lwtstate) 244 return 0; 245 246 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 247 lwtstate->type > LWTUNNEL_ENCAP_MAX) 248 return 0; 249 250 nest = nla_nest_start_noflag(skb, encap_attr); 251 if (!nest) 252 return -EMSGSIZE; 253 254 ret = -EOPNOTSUPP; 255 rcu_read_lock(); 256 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 257 if (likely(ops && ops->fill_encap)) 258 ret = ops->fill_encap(skb, lwtstate); 259 rcu_read_unlock(); 260 261 if (ret) 262 goto nla_put_failure; 263 nla_nest_end(skb, nest); 264 ret = nla_put_u16(skb, encap_type_attr, lwtstate->type); 265 if (ret) 266 goto nla_put_failure; 267 268 return 0; 269 270 nla_put_failure: 271 nla_nest_cancel(skb, nest); 272 273 return (ret == -EOPNOTSUPP ? 0 : ret); 274 } 275 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 276 277 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 278 { 279 const struct lwtunnel_encap_ops *ops; 280 int ret = 0; 281 282 if (!lwtstate) 283 return 0; 284 285 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 286 lwtstate->type > LWTUNNEL_ENCAP_MAX) 287 return 0; 288 289 rcu_read_lock(); 290 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 291 if (likely(ops && ops->get_encap_size)) 292 ret = nla_total_size(ops->get_encap_size(lwtstate)); 293 rcu_read_unlock(); 294 295 return ret; 296 } 297 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 298 299 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 300 { 301 const struct lwtunnel_encap_ops *ops; 302 int ret = 0; 303 304 if (!a && !b) 305 return 0; 306 307 if (!a || !b) 308 return 1; 309 310 if (a->type != b->type) 311 return 1; 312 313 if (a->type == LWTUNNEL_ENCAP_NONE || 314 a->type > LWTUNNEL_ENCAP_MAX) 315 return 0; 316 317 rcu_read_lock(); 318 ops = rcu_dereference(lwtun_encaps[a->type]); 319 if (likely(ops && ops->cmp_encap)) 320 ret = ops->cmp_encap(a, b); 321 rcu_read_unlock(); 322 323 return ret; 324 } 325 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 326 327 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 328 { 329 struct dst_entry *dst = skb_dst(skb); 330 const struct lwtunnel_encap_ops *ops; 331 struct lwtunnel_state *lwtstate; 332 int ret = -EINVAL; 333 334 if (!dst) 335 goto drop; 336 lwtstate = dst->lwtstate; 337 338 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 339 lwtstate->type > LWTUNNEL_ENCAP_MAX) 340 return 0; 341 342 ret = -EOPNOTSUPP; 343 rcu_read_lock(); 344 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 345 if (likely(ops && ops->output)) 346 ret = ops->output(net, sk, skb); 347 rcu_read_unlock(); 348 349 if (ret == -EOPNOTSUPP) 350 goto drop; 351 352 return ret; 353 354 drop: 355 kfree_skb(skb); 356 357 return ret; 358 } 359 EXPORT_SYMBOL_GPL(lwtunnel_output); 360 361 int lwtunnel_xmit(struct sk_buff *skb) 362 { 363 struct dst_entry *dst = skb_dst(skb); 364 const struct lwtunnel_encap_ops *ops; 365 struct lwtunnel_state *lwtstate; 366 int ret = -EINVAL; 367 368 if (!dst) 369 goto drop; 370 371 lwtstate = dst->lwtstate; 372 373 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 374 lwtstate->type > LWTUNNEL_ENCAP_MAX) 375 return 0; 376 377 ret = -EOPNOTSUPP; 378 rcu_read_lock(); 379 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 380 if (likely(ops && ops->xmit)) 381 ret = ops->xmit(skb); 382 rcu_read_unlock(); 383 384 if (ret == -EOPNOTSUPP) 385 goto drop; 386 387 return ret; 388 389 drop: 390 kfree_skb(skb); 391 392 return ret; 393 } 394 EXPORT_SYMBOL_GPL(lwtunnel_xmit); 395 396 int lwtunnel_input(struct sk_buff *skb) 397 { 398 struct dst_entry *dst = skb_dst(skb); 399 const struct lwtunnel_encap_ops *ops; 400 struct lwtunnel_state *lwtstate; 401 int ret = -EINVAL; 402 403 if (!dst) 404 goto drop; 405 lwtstate = dst->lwtstate; 406 407 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 408 lwtstate->type > LWTUNNEL_ENCAP_MAX) 409 return 0; 410 411 ret = -EOPNOTSUPP; 412 rcu_read_lock(); 413 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 414 if (likely(ops && ops->input)) 415 ret = ops->input(skb); 416 rcu_read_unlock(); 417 418 if (ret == -EOPNOTSUPP) 419 goto drop; 420 421 return ret; 422 423 drop: 424 kfree_skb(skb); 425 426 return ret; 427 } 428 EXPORT_SYMBOL_GPL(lwtunnel_input); 429