1 /* 2 * lwtunnel Infrastructure for light weight tunnels like mpls 3 * 4 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #include <linux/capability.h> 14 #include <linux/module.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <linux/slab.h> 18 #include <linux/uaccess.h> 19 #include <linux/skbuff.h> 20 #include <linux/netdevice.h> 21 #include <linux/lwtunnel.h> 22 #include <linux/in.h> 23 #include <linux/init.h> 24 #include <linux/err.h> 25 26 #include <net/lwtunnel.h> 27 #include <net/rtnetlink.h> 28 #include <net/ip6_fib.h> 29 #include <net/nexthop.h> 30 31 #ifdef CONFIG_MODULES 32 33 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) 34 { 35 /* Only lwt encaps implemented without using an interface for 36 * the encap need to return a string here. 37 */ 38 switch (encap_type) { 39 case LWTUNNEL_ENCAP_MPLS: 40 return "MPLS"; 41 case LWTUNNEL_ENCAP_ILA: 42 return "ILA"; 43 case LWTUNNEL_ENCAP_SEG6: 44 return "SEG6"; 45 case LWTUNNEL_ENCAP_BPF: 46 return "BPF"; 47 case LWTUNNEL_ENCAP_IP6: 48 case LWTUNNEL_ENCAP_IP: 49 case LWTUNNEL_ENCAP_NONE: 50 case __LWTUNNEL_ENCAP_MAX: 51 /* should not have got here */ 52 WARN_ON(1); 53 break; 54 } 55 return NULL; 56 } 57 58 #endif /* CONFIG_MODULES */ 59 60 struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 61 { 62 struct lwtunnel_state *lws; 63 64 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); 65 66 return lws; 67 } 68 EXPORT_SYMBOL(lwtunnel_state_alloc); 69 70 static const struct lwtunnel_encap_ops __rcu * 71 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; 72 73 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, 74 unsigned int num) 75 { 76 if (num > LWTUNNEL_ENCAP_MAX) 77 return -ERANGE; 78 79 return !cmpxchg((const struct lwtunnel_encap_ops **) 80 &lwtun_encaps[num], 81 NULL, ops) ? 0 : -1; 82 } 83 EXPORT_SYMBOL(lwtunnel_encap_add_ops); 84 85 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, 86 unsigned int encap_type) 87 { 88 int ret; 89 90 if (encap_type == LWTUNNEL_ENCAP_NONE || 91 encap_type > LWTUNNEL_ENCAP_MAX) 92 return -ERANGE; 93 94 ret = (cmpxchg((const struct lwtunnel_encap_ops **) 95 &lwtun_encaps[encap_type], 96 ops, NULL) == ops) ? 0 : -1; 97 98 synchronize_net(); 99 100 return ret; 101 } 102 EXPORT_SYMBOL(lwtunnel_encap_del_ops); 103 104 int lwtunnel_build_state(u16 encap_type, 105 struct nlattr *encap, unsigned int family, 106 const void *cfg, struct lwtunnel_state **lws, 107 struct netlink_ext_ack *extack) 108 { 109 const struct lwtunnel_encap_ops *ops; 110 bool found = false; 111 int ret = -EINVAL; 112 113 if (encap_type == LWTUNNEL_ENCAP_NONE || 114 encap_type > LWTUNNEL_ENCAP_MAX) { 115 NL_SET_ERR_MSG_ATTR(extack, encap, 116 "Unknown LWT encapsulation type"); 117 return ret; 118 } 119 120 ret = -EOPNOTSUPP; 121 rcu_read_lock(); 122 ops = rcu_dereference(lwtun_encaps[encap_type]); 123 if (likely(ops && ops->build_state && try_module_get(ops->owner))) { 124 found = true; 125 ret = ops->build_state(encap, family, cfg, lws, extack); 126 if (ret) 127 module_put(ops->owner); 128 } 129 rcu_read_unlock(); 130 131 /* don't rely on -EOPNOTSUPP to detect match as build_state 132 * handlers could return it 133 */ 134 if (!found) { 135 NL_SET_ERR_MSG_ATTR(extack, encap, 136 "LWT encapsulation type not supported"); 137 } 138 139 return ret; 140 } 141 EXPORT_SYMBOL(lwtunnel_build_state); 142 143 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) 144 { 145 const struct lwtunnel_encap_ops *ops; 146 int ret = -EINVAL; 147 148 if (encap_type == LWTUNNEL_ENCAP_NONE || 149 encap_type > LWTUNNEL_ENCAP_MAX) { 150 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type"); 151 return ret; 152 } 153 154 rcu_read_lock(); 155 ops = rcu_dereference(lwtun_encaps[encap_type]); 156 rcu_read_unlock(); 157 #ifdef CONFIG_MODULES 158 if (!ops) { 159 const char *encap_type_str = lwtunnel_encap_str(encap_type); 160 161 if (encap_type_str) { 162 __rtnl_unlock(); 163 request_module("rtnl-lwt-%s", encap_type_str); 164 rtnl_lock(); 165 166 rcu_read_lock(); 167 ops = rcu_dereference(lwtun_encaps[encap_type]); 168 rcu_read_unlock(); 169 } 170 } 171 #endif 172 ret = ops ? 0 : -EOPNOTSUPP; 173 if (ret < 0) 174 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported"); 175 176 return ret; 177 } 178 EXPORT_SYMBOL(lwtunnel_valid_encap_type); 179 180 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, 181 struct netlink_ext_ack *extack) 182 { 183 struct rtnexthop *rtnh = (struct rtnexthop *)attr; 184 struct nlattr *nla_entype; 185 struct nlattr *attrs; 186 u16 encap_type; 187 int attrlen; 188 189 while (rtnh_ok(rtnh, remaining)) { 190 attrlen = rtnh_attrlen(rtnh); 191 if (attrlen > 0) { 192 attrs = rtnh_attrs(rtnh); 193 nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 194 195 if (nla_entype) { 196 encap_type = nla_get_u16(nla_entype); 197 198 if (lwtunnel_valid_encap_type(encap_type, 199 extack) != 0) 200 return -EOPNOTSUPP; 201 } 202 } 203 rtnh = rtnh_next(rtnh, &remaining); 204 } 205 206 return 0; 207 } 208 EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); 209 210 void lwtstate_free(struct lwtunnel_state *lws) 211 { 212 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; 213 214 if (ops->destroy_state) { 215 ops->destroy_state(lws); 216 kfree_rcu(lws, rcu); 217 } else { 218 kfree(lws); 219 } 220 module_put(ops->owner); 221 } 222 EXPORT_SYMBOL(lwtstate_free); 223 224 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) 225 { 226 const struct lwtunnel_encap_ops *ops; 227 struct nlattr *nest; 228 int ret; 229 230 if (!lwtstate) 231 return 0; 232 233 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 234 lwtstate->type > LWTUNNEL_ENCAP_MAX) 235 return 0; 236 237 nest = nla_nest_start(skb, RTA_ENCAP); 238 if (!nest) 239 return -EMSGSIZE; 240 241 ret = -EOPNOTSUPP; 242 rcu_read_lock(); 243 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 244 if (likely(ops && ops->fill_encap)) 245 ret = ops->fill_encap(skb, lwtstate); 246 rcu_read_unlock(); 247 248 if (ret) 249 goto nla_put_failure; 250 nla_nest_end(skb, nest); 251 ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type); 252 if (ret) 253 goto nla_put_failure; 254 255 return 0; 256 257 nla_put_failure: 258 nla_nest_cancel(skb, nest); 259 260 return (ret == -EOPNOTSUPP ? 0 : ret); 261 } 262 EXPORT_SYMBOL(lwtunnel_fill_encap); 263 264 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) 265 { 266 const struct lwtunnel_encap_ops *ops; 267 int ret = 0; 268 269 if (!lwtstate) 270 return 0; 271 272 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 273 lwtstate->type > LWTUNNEL_ENCAP_MAX) 274 return 0; 275 276 rcu_read_lock(); 277 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 278 if (likely(ops && ops->get_encap_size)) 279 ret = nla_total_size(ops->get_encap_size(lwtstate)); 280 rcu_read_unlock(); 281 282 return ret; 283 } 284 EXPORT_SYMBOL(lwtunnel_get_encap_size); 285 286 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) 287 { 288 const struct lwtunnel_encap_ops *ops; 289 int ret = 0; 290 291 if (!a && !b) 292 return 0; 293 294 if (!a || !b) 295 return 1; 296 297 if (a->type != b->type) 298 return 1; 299 300 if (a->type == LWTUNNEL_ENCAP_NONE || 301 a->type > LWTUNNEL_ENCAP_MAX) 302 return 0; 303 304 rcu_read_lock(); 305 ops = rcu_dereference(lwtun_encaps[a->type]); 306 if (likely(ops && ops->cmp_encap)) 307 ret = ops->cmp_encap(a, b); 308 rcu_read_unlock(); 309 310 return ret; 311 } 312 EXPORT_SYMBOL(lwtunnel_cmp_encap); 313 314 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) 315 { 316 struct dst_entry *dst = skb_dst(skb); 317 const struct lwtunnel_encap_ops *ops; 318 struct lwtunnel_state *lwtstate; 319 int ret = -EINVAL; 320 321 if (!dst) 322 goto drop; 323 lwtstate = dst->lwtstate; 324 325 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 326 lwtstate->type > LWTUNNEL_ENCAP_MAX) 327 return 0; 328 329 ret = -EOPNOTSUPP; 330 rcu_read_lock(); 331 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 332 if (likely(ops && ops->output)) 333 ret = ops->output(net, sk, skb); 334 rcu_read_unlock(); 335 336 if (ret == -EOPNOTSUPP) 337 goto drop; 338 339 return ret; 340 341 drop: 342 kfree_skb(skb); 343 344 return ret; 345 } 346 EXPORT_SYMBOL(lwtunnel_output); 347 348 int lwtunnel_xmit(struct sk_buff *skb) 349 { 350 struct dst_entry *dst = skb_dst(skb); 351 const struct lwtunnel_encap_ops *ops; 352 struct lwtunnel_state *lwtstate; 353 int ret = -EINVAL; 354 355 if (!dst) 356 goto drop; 357 358 lwtstate = dst->lwtstate; 359 360 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 361 lwtstate->type > LWTUNNEL_ENCAP_MAX) 362 return 0; 363 364 ret = -EOPNOTSUPP; 365 rcu_read_lock(); 366 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 367 if (likely(ops && ops->xmit)) 368 ret = ops->xmit(skb); 369 rcu_read_unlock(); 370 371 if (ret == -EOPNOTSUPP) 372 goto drop; 373 374 return ret; 375 376 drop: 377 kfree_skb(skb); 378 379 return ret; 380 } 381 EXPORT_SYMBOL(lwtunnel_xmit); 382 383 int lwtunnel_input(struct sk_buff *skb) 384 { 385 struct dst_entry *dst = skb_dst(skb); 386 const struct lwtunnel_encap_ops *ops; 387 struct lwtunnel_state *lwtstate; 388 int ret = -EINVAL; 389 390 if (!dst) 391 goto drop; 392 lwtstate = dst->lwtstate; 393 394 if (lwtstate->type == LWTUNNEL_ENCAP_NONE || 395 lwtstate->type > LWTUNNEL_ENCAP_MAX) 396 return 0; 397 398 ret = -EOPNOTSUPP; 399 rcu_read_lock(); 400 ops = rcu_dereference(lwtun_encaps[lwtstate->type]); 401 if (likely(ops && ops->input)) 402 ret = ops->input(skb); 403 rcu_read_unlock(); 404 405 if (ret == -EOPNOTSUPP) 406 goto drop; 407 408 return ret; 409 410 drop: 411 kfree_skb(skb); 412 413 return ret; 414 } 415 EXPORT_SYMBOL(lwtunnel_input); 416