1 /* 2 * lwtunnel Infrastructure for light weight tunnels like mpls 3 * 4 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #include <linux/capability.h> 14 #include <linux/module.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <linux/slab.h> 18 #include <linux/uaccess.h> 19 #include <linux/skbuff.h> 20 #include <linux/netdevice.h> 21 #include <linux/lwtunnel.h> 22 #include <linux/in.h> 23 #include <linux/init.h> 24 #include <linux/err.h> 25 26 #include <net/lwtunnel.h> 27 #include <net/rtnetlink.h> 28 #include <net/ip6_fib.h> 29 #include <net/nexthop.h> 30 31 #ifdef CONFIG_MODULES 32 33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 34 { 35 /* Only lwt encaps implemented without using an interface for 36 * the encap need to return a string here. 37 */ 38 switch (encap_type) { 39 case LWTUNNEL_ENCAP_MPLS: 40 return "MPLS"; 41 case LWTUNNEL_ENCAP_ILA: 42 return "ILA"; 43 case LWTUNNEL_ENCAP_SEG6: 44 return "SEG6"; 45 case LWTUNNEL_ENCAP_BPF: 46 return "BPF"; 47 case LWTUNNEL_ENCAP_SEG6_LOCAL: 48 return "SEG6LOCAL"; 49 case LWTUNNEL_ENCAP_IP6: 50 case LWTUNNEL_ENCAP_IP: 51 case LWTUNNEL_ENCAP_NONE: 52 case __LWTUNNEL_ENCAP_MAX: 53 /* should not have got here */ 54 WARN_ON(1); 55 break; 56 } 57 return NULL; 58 } 59 60 #endif /* CONFIG_MODULES */ 61 62 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 63 { 64 struct lwtunnel_state *lws; 65 66 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 67 68 return lws; 69 } 70 EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); 71 72 static const struct lwtunnel_encap_ops __rcu * 73 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 74 75 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 76 unsigned int num) 77 { 78 if (num > LWTUNNEL_ENCAP_MAX) 79 return -ERANGE; 80 81 return !cmpxchg((const struct lwtunnel_encap_ops **) 82 &lwtun_encaps[num], 83 NULL, ops) ? 0 : -1; 84 } 85 EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); 86 87 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 88 unsigned int encap_type) 89 { 90 int ret; 91 92 if (encap_type == LWTUNNEL_ENCAP_NONE || 93 encap_type > LWTUNNEL_ENCAP_MAX) 94 return -ERANGE; 95 96 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 97 &lwtun_encaps[encap_type], 98 ops, NULL) == ops) ? 0 : -1; 99 100 synchronize_net(); 101 102 return ret; 103 } 104 EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); 105 106 int lwtunnel_build_state(u16 encap_type, 107 struct nlattr *encap, unsigned int family, 108 const void *cfg, struct lwtunnel_state **lws, 109 struct netlink_ext_ack *extack) 110 { 111 const struct lwtunnel_encap_ops *ops; 112 bool found = false; 113 int ret = -EINVAL; 114 115 if (encap_type == LWTUNNEL_ENCAP_NONE || 116 encap_type > LWTUNNEL_ENCAP_MAX) { 117 NL_SET_ERR_MSG_ATTR(extack, encap, 118 "Unknown LWT encapsulation type"); 119 return ret; 120 } 121 122 ret = -EOPNOTSUPP; 123 rcu_read_lock(); 124 ops = rcu_dereference(lwtun_encaps[encap_type]); 125 if (likely(ops && ops->build_state && try_module_get(ops->owner))) 126 found = true; 127 rcu_read_unlock(); 128 129 if (found) { 130 ret = ops->build_state(encap, family, cfg, lws, extack); 131 if (ret) 132 module_put(ops->owner); 133 } else { 134 /* don't rely on -EOPNOTSUPP to detect match as build_state 135 * handlers could return it 136 */ 137 NL_SET_ERR_MSG_ATTR(extack, encap, 138 "LWT encapsulation type not supported"); 139 } 140 141 return ret; 142 } 143 EXPORT_SYMBOL_GPL(lwtunnel_build_state); 144 145 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) 146 { 147 const struct lwtunnel_encap_ops *ops; 148 int ret = -EINVAL; 149 150 if (encap_type == LWTUNNEL_ENCAP_NONE || 151 encap_type > LWTUNNEL_ENCAP_MAX) { 152 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 153 return ret; 154 } 155 156 rcu_read_lock(); 157 ops = rcu_dereference(lwtun_encaps[encap_type]); 158 rcu_read_unlock(); 159 #ifdef CONFIG_MODULES 160 if (!ops) { 161 const char *encap_type_str = lwtunnel_encap_str(encap_type); 162 163 if (encap_type_str) { 164 __rtnl_unlock(); 165 request_module("rtnl-lwt-%s", encap_type_str); 166 rtnl_lock(); 167 168 rcu_read_lock(); 169 ops = rcu_dereference(lwtun_encaps[encap_type]); 170 rcu_read_unlock(); 171 } 172 } 173 #endif 174 ret = ops ? 0 : -EOPNOTSUPP; 175 if (ret < 0) 176 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 177 178 return ret; 179 } 180 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); 181 182 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 183 struct netlink_ext_ack *extack) 184 { 185 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 186 struct nlattr *nla_entype; 187 struct nlattr *attrs; 188 u16 encap_type; 189 int attrlen; 190 191 while (rtnh_ok(rtnh, remaining)) { 192 attrlen = rtnh_attrlen(rtnh); 193 if (attrlen > 0) { 194 attrs = rtnh_attrs(rtnh); 195 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 196 197 if (nla_entype) { 198 encap_type = nla_get_u16(nla_entype); 199 200 if (lwtunnel_valid_encap_type(encap_type, 201 extack) != 0) 202 return -EOPNOTSUPP; 203 } 204 } 205 rtnh = rtnh_next(rtnh, &remaining); 206 } 207 208 return 0; 209 } 210 EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); 211 212 void lwtstate_free(struct lwtunnel_state *lws) 213 { 214 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 215 216 if (ops->destroy_state) { 217 ops->destroy_state(lws); 218 kfree_rcu(lws, rcu); 219 } else { 220 kfree(lws); 221 } 222 module_put(ops->owner); 223 } 224 EXPORT_SYMBOL_GPL(lwtstate_free); 225 226 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) 227 { 228 const struct lwtunnel_encap_ops *ops; 229 struct nlattr *nest; 230 int ret; 231 232 if (!lwtstate) 233 return 0; 234 235 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 236 lwtstate->type > LWTUNNEL_ENCAP_MAX) 237 return 0; 238 239 nest = nla_nest_start(skb, RTA_ENCAP); 240 if (!nest) 241 return -EMSGSIZE; 242 243 ret = -EOPNOTSUPP; 244 rcu_read_lock(); 245 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 246 if (likely(ops && ops->fill_encap)) 247 ret = ops->fill_encap(skb, lwtstate); 248 rcu_read_unlock(); 249 250 if (ret) 251 goto nla_put_failure; 252 nla_nest_end(skb, nest); 253 ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type); 254 if (ret) 255 goto nla_put_failure; 256 257 return 0; 258 259 nla_put_failure: 260 nla_nest_cancel(skb, nest); 261 262 return (ret == -EOPNOTSUPP ? 0 : ret); 263 } 264 EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); 265 266 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 267 { 268 const struct lwtunnel_encap_ops *ops; 269 int ret = 0; 270 271 if (!lwtstate) 272 return 0; 273 274 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 275 lwtstate->type > LWTUNNEL_ENCAP_MAX) 276 return 0; 277 278 rcu_read_lock(); 279 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 280 if (likely(ops && ops->get_encap_size)) 281 ret = nla_total_size(ops->get_encap_size(lwtstate)); 282 rcu_read_unlock(); 283 284 return ret; 285 } 286 EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); 287 288 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 289 { 290 const struct lwtunnel_encap_ops *ops; 291 int ret = 0; 292 293 if (!a && !b) 294 return 0; 295 296 if (!a || !b) 297 return 1; 298 299 if (a->type != b->type) 300 return 1; 301 302 if (a->type == LWTUNNEL_ENCAP_NONE || 303 a->type > LWTUNNEL_ENCAP_MAX) 304 return 0; 305 306 rcu_read_lock(); 307 ops = rcu_dereference(lwtun_encaps[a->type]); 308 if (likely(ops && ops->cmp_encap)) 309 ret = ops->cmp_encap(a, b); 310 rcu_read_unlock(); 311 312 return ret; 313 } 314 EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); 315 316 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 317 { 318 struct dst_entry *dst = skb_dst(skb); 319 const struct lwtunnel_encap_ops *ops; 320 struct lwtunnel_state *lwtstate; 321 int ret = -EINVAL; 322 323 if (!dst) 324 goto drop; 325 lwtstate = dst->lwtstate; 326 327 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 328 lwtstate->type > LWTUNNEL_ENCAP_MAX) 329 return 0; 330 331 ret = -EOPNOTSUPP; 332 rcu_read_lock(); 333 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 334 if (likely(ops && ops->output)) 335 ret = ops->output(net, sk, skb); 336 rcu_read_unlock(); 337 338 if (ret == -EOPNOTSUPP) 339 goto drop; 340 341 return ret; 342 343 drop: 344 kfree_skb(skb); 345 346 return ret; 347 } 348 EXPORT_SYMBOL_GPL(lwtunnel_output); 349 350 int lwtunnel_xmit(struct sk_buff *skb) 351 { 352 struct dst_entry *dst = skb_dst(skb); 353 const struct lwtunnel_encap_ops *ops; 354 struct lwtunnel_state *lwtstate; 355 int ret = -EINVAL; 356 357 if (!dst) 358 goto drop; 359 360 lwtstate = dst->lwtstate; 361 362 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 363 lwtstate->type > LWTUNNEL_ENCAP_MAX) 364 return 0; 365 366 ret = -EOPNOTSUPP; 367 rcu_read_lock(); 368 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 369 if (likely(ops && ops->xmit)) 370 ret = ops->xmit(skb); 371 rcu_read_unlock(); 372 373 if (ret == -EOPNOTSUPP) 374 goto drop; 375 376 return ret; 377 378 drop: 379 kfree_skb(skb); 380 381 return ret; 382 } 383 EXPORT_SYMBOL_GPL(lwtunnel_xmit); 384 385 int lwtunnel_input(struct sk_buff *skb) 386 { 387 struct dst_entry *dst = skb_dst(skb); 388 const struct lwtunnel_encap_ops *ops; 389 struct lwtunnel_state *lwtstate; 390 int ret = -EINVAL; 391 392 if (!dst) 393 goto drop; 394 lwtstate = dst->lwtstate; 395 396 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 397 lwtstate->type > LWTUNNEL_ENCAP_MAX) 398 return 0; 399 400 ret = -EOPNOTSUPP; 401 rcu_read_lock(); 402 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 403 if (likely(ops && ops->input)) 404 ret = ops->input(skb); 405 rcu_read_unlock(); 406 407 if (ret == -EOPNOTSUPP) 408 goto drop; 409 410 return ret; 411 412 drop: 413 kfree_skb(skb); 414 415 return ret; 416 } 417 EXPORT_SYMBOL_GPL(lwtunnel_input); 418